Files
tustu/generate_smart_mapping.py
johndoe6345789 db7e396d34 Enhance filename generation logic for Java classes
- Improved filtering of field types, imported classes, instantiated classes, method parameter types, and return types to exclude Java primitives, common types, and keywords.
- Added semantic analysis for generating more meaningful class names based on field names, instantiated classes, and method names.
- Introduced a contextual naming strategy that incorporates various characteristics of the class, such as technology context (networking, I/O, threading), field types, and method names.
- Implemented a global uniqueness check for generated filenames to prevent conflicts across packages.
- Updated the fallback naming strategy to ensure uniqueness while maintaining descriptiveness.
- Enhanced logging for duplicate name detection and resolution.
2026-01-11 19:55:57 +00:00

874 lines
38 KiB
Python

#!/usr/bin/env python3
"""
Smart Java file renaming with conflict resolution and creative naming.
Uses actual class names and context to generate unique, descriptive filenames.
"""
import os
import sys
import json
import re
from collections import defaultdict
def extract_class_info(filepath, content):
"""Extract class name and key characteristics from Java content"""
info = {
'class_name': None,
'is_interface': False,
'is_abstract': False,
'is_enum': False,
'extends': None,
'implements': [],
'key_methods': [],
'field_types': [],
'field_names': [],
'string_literals': [],
'imported_classes': [],
'instantiated_classes': [],
'description': None,
'specific_name': None
}
# Extract class/interface/enum declaration
class_match = re.search(r'public\s+(?:(abstract|final)\s+)?(class|interface|enum)\s+(\w+)', content)
if class_match:
modifier, type_keyword, class_name = class_match.groups()
info['class_name'] = class_name
info['is_interface'] = (type_keyword == 'interface')
info['is_enum'] = (type_keyword == 'enum')
info['is_abstract'] = (modifier == 'abstract')
# Extract extends
extends_match = re.search(r'extends\s+(\w+)', content)
if extends_match:
info['extends'] = extends_match.group(1)
# Extract implements
implements_match = re.findall(r'implements\s+([\w,\s]+)', content)
if implements_match:
for impl in implements_match:
info['implements'].extend([i.strip() for i in impl.split(',')])
# Extract ALL public methods (not just first 5)
methods = re.findall(r'public\s+(?:static\s+)?(?:synchronized\s+)?\w+\s+(\w+)\s*\(', content)
info['key_methods'] = methods
# Extract field types to understand what this class manages
field_types = re.findall(r'(?:private|protected|public)\s+(?:static\s+)?(?:final\s+)?(\w+)\s+\w+\s*[;=]', content)
# Filter out primitives and common types AND Java keywords
java_primitives_and_keywords = ['int', 'long', 'boolean', 'double', 'float', 'String', 'List', 'Map', 'Object',
'char', 'byte', 'short', 'void', 'class', 'interface', 'extends', 'implements']
info['field_types'] = [f for f in field_types if len(f) > 2 and f not in java_primitives_and_keywords][:10]
# Extract field names for context
field_names = re.findall(r'(?:private|protected|public)\s+(?:static\s+)?(?:final\s+)?\w+\s+(\w+)\s*[;=]', content)
info['field_names'] = [f for f in field_names if len(f) > 1][:10]
# Extract string literals for domain understanding
string_literals = re.findall(r'"([^"]{3,40})"', content) # 3-40 char strings
info['string_literals'] = string_literals[:10]
# Extract imported classes (just the class name, not full package)
imports = re.findall(r'import\s+[\w.]+\.(\w+);', content)
# Filter out classes with numbers (like Base64, CRC32, etc.)
info['imported_classes'] = [i for i in imports if len(i) > 2 and not any(c.isdigit() for c in i)][:15]
# Extract instantiated classes (new ClassName())
instantiations = re.findall(r'new\s+(\w+)\s*\(', content)
# Filter out primitives, wrappers, and classes with numbers
info['instantiated_classes'] = [i for i in instantiations
if len(i) > 2
and i not in ['int', 'Integer', 'long', 'Long',
'boolean', 'Boolean', 'double', 'Double',
'float', 'Float', 'byte', 'Byte',
'short', 'Short', 'char', 'Character']
and not any(c.isdigit() for c in i)][:15]
# Extract method parameter types for more context
method_params = re.findall(r'\w+\s+\w+\s*\(([^)]*)\)', content)
param_types = []
java_primitives_and_keywords = ['int', 'long', 'boolean', 'double', 'float', 'String', 'List', 'Map', 'Object',
'char', 'byte', 'short', 'void', 'class', 'interface']
for params in method_params:
if params.strip():
types = re.findall(r'(\w+)\s+\w+', params)
param_types.extend([t for t in types if len(t) > 2 and t not in java_primitives_and_keywords])
info['param_types'] = list(set(param_types))[:10]
# Extract method return types
return_types = re.findall(r'(?:public|private|protected)\s+(?:static\s+)?(\w+)\s+\w+\s*\(', content)
java_primitives_and_keywords = ['int', 'long', 'boolean', 'double', 'float', 'String', 'List', 'Map', 'Object',
'char', 'byte', 'short', 'void', 'class', 'interface']
info['return_types'] = [r for r in return_types if len(r) > 2 and r not in java_primitives_and_keywords][:10]
# Look for specific patterns in method bodies
info['uses_thread'] = 'Thread' in content or 'Runnable' in content or 'ExecutorService' in content
info['uses_io'] = 'InputStream' in content or 'OutputStream' in content or 'Reader' in content or 'Writer' in content
info['uses_network'] = 'Socket' in content or 'ServerSocket' in content or 'URL' in content or 'HttpURLConnection' in content
info['uses_swing'] = 'JFrame' in content or 'JPanel' in content or 'JButton' in content or 'JTable' in content
info['uses_collection'] = 'List' in content or 'Map' in content or 'Set' in content
info['uses_serialization'] = 'Serializable' in content or 'ObjectInputStream' in content or 'ObjectOutputStream' in content
# Try to infer specific name from content
specific_name = None
# Special handling for ActionListener - look at body content first
if 'ActionListener' in info['implements']:
# Extract more context from the code body
content_lower = content.lower()
# Look for method calls to understand the action
method_calls = re.findall(r'\.(\w+)\(', content)
meaningful_calls = [m for m in method_calls if len(m) > 3 and m not in ['this', 'super', 'toString', 'equals', 'hashCode', 'getClass']]
# Look for field assignments to understand what's being modified
field_assignments = re.findall(r'this\.(\w+)\s*=', content)
meaningful_fields = [f for f in field_assignments if len(f) > 1]
# Look for specific patterns with more context
if '.dispose()' in content:
# Check what's being disposed
if 'dialog' in content_lower:
if 'cancel' in content_lower or 'close' in content_lower:
specific_name = 'CancelDialogAndCloseListener'
else:
specific_name = 'DisposeDialogListener'
elif 'window' in content_lower or 'frame' in content_lower:
specific_name = 'CloseWindowListener'
else:
specific_name = 'DisposeListener'
# RadioButton with specific context
elif 'JRadioButton' in content:
if 'rating' in content_lower or 'feedback' in content_lower:
specific_name = 'RatingSelectionRadioButtonListener'
elif 'parse' in content_lower or 'Integer.parseInt' in content:
specific_name = 'ParseIntegerFromRadioButtonListener'
elif meaningful_fields:
specific_name = f"Update{meaningful_fields[0].capitalize()}FromRadioButtonListener"
else:
specific_name = 'RadioButtonSelectionListener'
# CheckBox with specific context
elif 'JCheckBox' in content:
if 'enable' in content_lower or 'disable' in content_lower:
specific_name = 'ToggleEnableStateCheckBoxListener'
elif 'visible' in content_lower or 'visibility' in content_lower:
specific_name = 'ToggleVisibilityCheckBoxListener'
elif meaningful_fields:
specific_name = f"Update{meaningful_fields[0].capitalize()}CheckBoxListener"
else:
specific_name = 'CheckBoxStateChangeListener'
# MenuItem with context
elif 'JMenuItem' in content or 'MenuItem' in content:
if 'open' in content_lower:
specific_name = 'OpenMenuItemListener'
elif 'save' in content_lower:
specific_name = 'SaveMenuItemListener'
elif 'exit' in content_lower or 'quit' in content_lower:
specific_name = 'ExitMenuItemListener'
else:
specific_name = 'MenuItemActionListener'
# Dialog patterns with more specificity
elif ('JDialog' in content or 'Dialog' in content) and content.count('Dialog') > 1:
if 'submit' in content_lower or 'ok' in content_lower:
if 'validation' in content_lower or 'validate' in content_lower:
specific_name = 'ValidateAndSubmitDialogListener'
else:
specific_name = 'SubmitDialogListener'
elif 'cancel' in content_lower:
specific_name = 'CancelDialogListener'
elif 'apply' in content_lower:
specific_name = 'ApplyDialogChangesListener'
else:
specific_name = 'DialogActionListener'
# Frame actions
elif ('JFrame' in content or 'Frame' in content) and content.count('Frame') > 1:
if 'minimize' in content_lower:
specific_name = 'MinimizeFrameListener'
elif 'maximize' in content_lower:
specific_name = 'MaximizeFrameListener'
else:
specific_name = 'FrameActionListener'
# Button actions
elif ('JButton' in content or 'Button' in content) and content.count('Button') > 2:
if 'browse' in content_lower:
specific_name = 'BrowseButtonClickListener'
elif 'search' in content_lower:
specific_name = 'SearchButtonClickListener'
else:
specific_name = 'ButtonClickActionListener'
# Look for method calls that indicate purpose (more specific)
if not specific_name:
if 'setEnabled' in meaningful_calls:
specific_name = 'ToggleComponentEnabledStateListener'
elif 'setVisible' in meaningful_calls:
specific_name = 'ToggleComponentVisibilityListener'
elif 'setText' in meaningful_calls:
specific_name = 'UpdateTextFieldListener'
elif 'setSelected' in meaningful_calls:
specific_name = 'UpdateSelectionStateListener'
elif any(m.startswith('show') for m in meaningful_calls):
show_methods = [m for m in meaningful_calls if m.startswith('show')]
specific_name = f"{show_methods[0].capitalize()}Listener"
elif any(m.startswith('open') for m in meaningful_calls):
open_methods = [m for m in meaningful_calls if m.startswith('open')]
specific_name = f"{open_methods[0].capitalize()}Listener"
elif any(m.startswith('start') for m in meaningful_calls):
start_methods = [m for m in meaningful_calls if m.startswith('start')]
specific_name = f"{start_methods[0].capitalize()}Listener"
elif meaningful_calls and len(meaningful_calls) > 0:
# Use the most prominent method call
specific_name = f"{meaningful_calls[0].capitalize()}ActionListener"
# Fallback to generic ActionListener
if not specific_name:
specific_name = 'GenericActionListener'
# Check for OTHER listener/callback patterns with specific event types (not ActionListener)
if not specific_name:
for impl in info['implements']:
if impl != 'ActionListener' and ('Listener' in impl or 'Callback' in impl):
# Extract the specific type (e.g., "DeviceUpdate" from "DeviceUpdateListener")
specific = impl.replace('Listener', '').replace('Callback', '').replace('Handler', '')
if len(specific) > 2:
specific_name = f"{specific}Listener"
break
# Look for dominant field types (what this class manages)
if not specific_name and info['field_types']:
type_counts = {}
for ft in info['field_types']:
if ft not in ['int', 'long', 'boolean', 'double', 'float', 'String', 'List', 'Map', 'Object']:
type_counts[ft] = type_counts.get(ft, 0) + 1
if type_counts:
dominant_type = max(type_counts.items(), key=lambda x: x[1])[0]
if len(dominant_type) > 2:
# Add technology context
if info.get('uses_network') and 'Socket' not in dominant_type:
specific_name = f"Network{dominant_type}Manager"
elif info.get('uses_io') and 'File' in content:
specific_name = f"File{dominant_type}Manager"
elif info.get('uses_thread'):
specific_name = f"Threaded{dominant_type}Manager"
elif type_counts[dominant_type] > 1:
specific_name = f"Multiple{dominant_type}Manager"
else:
specific_name = f"{dominant_type}Manager"
# Analyze field names for semantic meaning
if not specific_name and info['field_names']:
meaningful_fields = [f for f in info['field_names'] if len(f) > 3 and f not in ['this', 'that', 'temp', 'tmp', 'value', 'data']]
if meaningful_fields:
# Look for domain-specific field patterns
field_str = ' '.join(meaningful_fields).lower()
if 'connection' in field_str or 'socket' in field_str:
specific_name = 'ConnectionManager'
elif 'cache' in field_str or 'buffer' in field_str:
specific_name = 'CacheManager'
elif 'config' in field_str or 'setting' in field_str:
specific_name = 'ConfigurationManager'
elif 'logger' in field_str or 'logging' in field_str:
specific_name = 'LoggingManager'
elif 'session' in field_str:
specific_name = 'SessionManager'
elif 'queue' in field_str:
specific_name = 'QueueManager'
# Look at instantiated classes for more specific naming
if not specific_name and info['instantiated_classes']:
# Filter out common types
meaningful_classes = [c for c in info['instantiated_classes']
if c not in ['StringBuilder', 'StringBuffer', 'ArrayList', 'HashMap', 'HashSet',
'LinkedList', 'TreeMap', 'TreeSet', 'Vector', 'Hashtable', 'Exception',
'RuntimeException', 'IllegalArgumentException', 'Date', 'Calendar',
'String', 'Integer', 'Long', 'Double', 'Float', 'Boolean', 'Object']]
if meaningful_classes:
# Get most common instantiation
class_counts = {}
for cls in meaningful_classes:
class_counts[cls] = class_counts.get(cls, 0) + 1
most_common = max(class_counts.items(), key=lambda x: x[1])[0]
# Only use if it's not an exception being thrown
if not most_common.endswith('Exception'):
if class_counts[most_common] > 1:
specific_name = f"Multiple{most_common}Creator"
else:
specific_name = f"{most_common}Creator"
# Use string literals to understand domain context
if not specific_name and info['string_literals']:
# Look for domain-specific keywords in strings
combined_strings = ' '.join(info['string_literals']).lower()
domain_keywords = {
'error': 'ErrorHandler',
'warning': 'WarningHandler',
'log': 'LogHandler',
'file': 'FileHandler',
'connection': 'ConnectionHandler',
'socket': 'SocketHandler',
'serial': 'SerialHandler',
'port': 'PortHandler',
'device': 'DeviceHandler',
'sensor': 'SensorHandler',
'gauge': 'GaugeHandler',
'table': 'TableHandler',
'chart': 'ChartHandler',
'graph': 'GraphHandler',
'config': 'ConfigHandler',
'setting': 'SettingsHandler',
'parameter': 'ParameterHandler',
'tuning': 'TuningHandler',
'calibration': 'CalibrationHandler',
'diagnostic': 'DiagnosticHandler',
}
for keyword, handler_name in domain_keywords.items():
if keyword in combined_strings:
specific_name = handler_name
break
# Look for method name patterns (verbs + nouns)
if not specific_name and info['key_methods']:
# Find methods with meaningful names (>3 chars)
meaningful_methods = [m for m in info['key_methods'] if len(m) > 3]
if meaningful_methods:
# Look for common patterns
for method in meaningful_methods:
# handleXxx, processXxx, manageXxx patterns
for prefix in ['handle', 'process', 'manage', 'update', 'create', 'build']:
if method.lower().startswith(prefix) and len(method) > len(prefix) + 2:
noun = method[len(prefix):].capitalize()
specific_name = f"{noun}Handler"
break
if specific_name:
break
info['specific_name'] = specific_name
# Generate description based on context
desc_parts = []
if info['is_interface']:
desc_parts.append('Interface')
elif info['is_enum']:
desc_parts.append('Enum')
elif info['is_abstract']:
desc_parts.append('Abstract')
# Add pattern hints
if any(m in ['getInstance', 'create', 'newInstance'] for m in info['key_methods']):
desc_parts.append('Factory')
elif any('Listener' in i or 'Observer' in i for i in info['implements']):
desc_parts.append('Listener')
elif 'Exception' in str(info['extends']) or 'Error' in str(info['extends']):
desc_parts.append('Exception')
elif any(m.startswith('render') or m == 'paint' for m in info['key_methods']):
desc_parts.append('Renderer')
elif any(m == 'parse' or 'parse' in m.lower() for m in info['key_methods']):
desc_parts.append('Parser')
elif any(m in ['save', 'load', 'persist', 'fetch'] for m in info['key_methods']):
desc_parts.append('Repository')
elif any(m in ['start', 'stop', 'execute', 'run'] for m in info['key_methods']):
desc_parts.append('Service')
elif any(m in ['validate', 'check', 'isValid'] for m in info['key_methods']):
desc_parts.append('Validator')
elif len(info['key_methods']) >= 4:
desc_parts.append('Manager')
info['description'] = ''.join(desc_parts) if desc_parts else None
return info
def generate_creative_name(class_info, pkg_name, fallback_letter, used_names):
"""Generate a creative, unique filename using deep semantic analysis"""
# Helper to build context-rich names
def build_contextual_name(base_name):
"""Add context to base name to make it unique"""
# Sanitize base name: spell out numbers
number_words = {'0': 'Zero', '1': 'One', '2': 'Two', '3': 'Three', '4': 'Four',
'5': 'Five', '6': 'Six', '7': 'Seven', '8': 'Eight', '9': 'Nine'}
sanitized_base = base_name
for digit, word in number_words.items():
sanitized_base = sanitized_base.replace(digit, word)
# Also check if it's a Java keyword - if so, add descriptive suffix
java_keywords = ['boolean', 'int', 'long', 'double', 'float', 'char', 'byte', 'short',
'void', 'class', 'interface', 'Interface']
if sanitized_base.lower() in java_keywords:
sanitized_base = f"{sanitized_base}Type" # "boolean" → "booleanType"
if sanitized_base + ".java" not in used_names:
return sanitized_base + ".java"
return sanitized_base + ".java"
# Add field type context
if class_info.get('field_types'):
for ft in class_info['field_types'][:5]:
if ft.lower() not in sanitized_base.lower() and len(ft) > 2:
# Sanitize field type too
sanitized_ft = ft
for digit, word in number_words.items():
sanitized_ft = sanitized_ft.replace(digit, word)
name = f"{sanitized_base}With{sanitized_ft}"
if name + ".java" not in used_names:
return name + ".java"
# Add instantiated class context (avoid redundancy)
if class_info.get('instantiated_classes'):
for inst in class_info['instantiated_classes'][:5]:
if inst.lower() not in base_name.lower() and len(inst) > 2:
if not inst.endswith('Exception'):
name = f"{base_name}Creating{inst}"
if name + ".java" not in used_names:
return name + ".java"
# Add param type context
if class_info.get('param_types'):
for param in class_info['param_types'][:5]:
if param.lower() not in base_name.lower() and len(param) > 2:
name = f"{base_name}Using{param}"
if name + ".java" not in used_names:
return name + ".java"
# Add return type context
if class_info.get('return_types'):
for ret in class_info['return_types'][:5]:
if ret.lower() not in base_name.lower() and len(ret) > 2:
name = f"{base_name}Returning{ret}"
if name + ".java" not in used_names:
return name + ".java"
# Add method name context (skip getters/setters)
if class_info.get('key_methods'):
for method in class_info['key_methods'][:10]:
if len(method) > 3 and not method.startswith('get') and not method.startswith('set'):
if method.lower() not in base_name.lower():
name = f"{base_name}{method.capitalize()}"
if name + ".java" not in used_names:
return name + ".java"
# Add field name context
if class_info.get('field_names'):
for field in class_info['field_names'][:10]:
if len(field) > 3 and field.lower() not in base_name.lower():
name = f"{base_name}For{field.capitalize()}"
if name + ".java" not in used_names:
return name + ".java"
# Add package context (clean version)
pkg_clean = pkg_name.replace('_', '').capitalize()
name = f"{base_name}In{pkg_clean}Package"
if name + ".java" not in used_names:
return name + ".java"
# Last resort: combine multiple contexts
if class_info.get('field_types') and class_info.get('key_methods'):
ft = class_info['field_types'][0]
method = [m for m in class_info['key_methods'] if len(m) > 3][0] if [m for m in class_info['key_methods'] if len(m) > 3] else ''
if method:
name = f"{base_name}{method.capitalize()}{ft}"
if name + ".java" not in used_names:
return name + ".java"
return None
# Strategy 1: Use the specific inferred name with context
if class_info.get('specific_name') and len(class_info['specific_name']) > 2:
result = build_contextual_name(class_info['specific_name'])
if result:
return result
# Strategy 2: Use actual class name with context
if class_info['class_name'] and len(class_info['class_name']) > 2:
# Avoid Java keywords
java_keywords = ['boolean', 'int', 'long', 'double', 'float', 'char', 'byte', 'short',
'void', 'class', 'interface', 'extends', 'implements', 'package', 'import',
'public', 'private', 'protected', 'static', 'final', 'abstract']
if class_info['class_name'].lower() not in java_keywords:
result = build_contextual_name(class_info['class_name'])
if result:
return result
# Strategy 3: Use description with enriched context
if class_info['description']:
# Skip if description itself is a keyword
java_keywords = ['boolean', 'int', 'long', 'double', 'float', 'char', 'byte', 'short',
'void', 'class', 'interface', 'extends', 'implements', 'Interface']
if class_info['description'].lower() not in java_keywords:
result = build_contextual_name(class_info['description'])
if result:
return result
# Strategy 4: Build name from extends
if class_info.get('extends') and len(class_info['extends']) > 2:
base_name = f"{class_info['extends']}Extension"
result = build_contextual_name(base_name)
if result:
return result
# Strategy 5: Use implements interface with context
if class_info.get('implements'):
for impl in class_info['implements']:
impl_name = impl.split('.')[-1]
if len(impl_name) > 2:
result = build_contextual_name(f"{impl_name}Impl")
if result:
return result
# Strategy 6: Build name from multiple characteristics
name_parts = []
# Add technology context
if class_info.get('uses_network'):
name_parts.append('Network')
elif class_info.get('uses_io'):
name_parts.append('IO')
elif class_info.get('uses_thread'):
name_parts.append('Threaded')
elif class_info.get('uses_swing'):
name_parts.append('UI')
# Add primary field type
if class_info.get('field_types'):
ft = class_info['field_types'][0]
if len(ft) > 2 and ft not in ['String', 'Object', 'List', 'Map']:
name_parts.append(ft)
# Add primary method
if class_info.get('key_methods'):
for method in class_info['key_methods'][:5]:
if len(method) > 3 and not method.startswith('get') and not method.startswith('set'):
name_parts.append(method.capitalize())
break
if len(name_parts) > 0:
base_name = ''.join(name_parts)
result = build_contextual_name(base_name)
if result:
return result
# Strategy 7: Package-based comprehensive name
pkg_clean = pkg_name.replace('_', '').capitalize()
base_name = f"{pkg_clean}Component"
result = build_contextual_name(base_name)
if result:
return result
# Final fallback: Combine everything for guaranteed uniqueness
# NO single letters, NO numbers - just descriptive concatenation
parts = [pkg_clean]
if class_info.get('description'):
parts.append(class_info['description'])
elif class_info.get('is_interface'):
parts.append('InterfaceImpl')
elif class_info.get('is_abstract'):
parts.append('AbstractImpl')
else:
parts.append('ComponentImpl')
if class_info.get('field_types'):
ft = class_info['field_types'][0]
if len(ft) > 2:
parts.append('Managing' + ft)
elif class_info.get('param_types'):
pt = class_info['param_types'][0]
if len(pt) > 2:
parts.append('Processing' + pt)
if class_info.get('key_methods'):
for m in class_info['key_methods']:
if len(m) > 3 and not m.startswith('get') and not m.startswith('set'):
parts.append('Via' + m.capitalize())
break
# Use original filename letter as distinguisher (spelled out)
letter_names = {
'a': 'Alpha', 'b': 'Beta', 'c': 'Charlie', 'd': 'Delta', 'e': 'Echo',
'f': 'Foxtrot', 'g': 'Golf', 'h': 'Hotel', 'i': 'India', 'j': 'Juliet',
'k': 'Kilo', 'l': 'Lima', 'm': 'Mike', 'n': 'November', 'o': 'Oscar',
'p': 'Papa', 'q': 'Quebec', 'r': 'Romeo', 's': 'Sierra', 't': 'Tango',
'u': 'Uniform', 'v': 'Victor', 'w': 'Whiskey', 'x': 'Xray', 'y': 'Yankee',
'z': 'Zulu'
}
letter_key = fallback_letter.lower()
if letter_key in letter_names:
parts.append(letter_names[letter_key])
else:
# For multi-char filenames, use them directly
parts.append(fallback_letter.capitalize() + 'Variant')
final_name = ''.join(parts) + '.java'
# If still duplicate, try adding more context systematically
if final_name in used_names:
# Add imported classes
if class_info.get('imported_classes'):
for imp in class_info['imported_classes'][:3]:
test_name = ''.join(parts) + 'Imports' + imp + '.java'
if test_name not in used_names:
return test_name
# Add string literal hints
if class_info.get('string_literals'):
for lit in class_info['string_literals'][:2]:
# Clean the string for use in filename
clean_lit = ''.join(c.capitalize() if c.isalnum() else '' for c in lit)
if len(clean_lit) > 3:
test_name = ''.join(parts) + 'Ref' + clean_lit[:15] + '.java'
if test_name not in used_names:
return test_name
# Add extends/implements info
if class_info.get('extends'):
test_name = ''.join(parts) + 'Extends' + class_info['extends'] + '.java'
if test_name not in used_names:
return test_name
# Ultimate fallback: use phonetic alphabet with package combo
combo = pkg_clean + letter_names.get(letter_key, fallback_letter.capitalize())
test_name = ''.join(parts) + 'Unique' + combo + '.java'
if test_name not in used_names:
return test_name
# If STILL duplicate (nearly impossible), add more parts
extra_parts = []
if class_info.get('uses_network'):
extra_parts.append('Network')
if class_info.get('uses_io'):
extra_parts.append('InputOutput')
if class_info.get('uses_thread'):
extra_parts.append('Threading')
if class_info.get('uses_swing'):
extra_parts.append('SwingUI')
if extra_parts:
return ''.join(parts) + ''.join(extra_parts) + combo + '.java'
else:
# Very last resort: full package name spelling
return ''.join(parts) + 'From' + pkg_name.replace('_', '').capitalize() + combo + '.java'
return final_name
def generate_smart_mapping_v2():
"""Generate improved mapping with conflict resolution"""
base_path = "/home/rewrich/Documents/GitHub/tustu/app/obfuscated_packages"
mapping = {}
analysis_details = {}
stats = {
'mapped': 0,
'skipped_renamed': 0,
'skipped_empty': 0,
'used_class_name': 0,
'used_description': 0,
'used_fallback': 0
}
# Package name mappings
package_renames = {
"a": "linear_algebra",
"A": "ui_colors",
"B": "bluetooth_comm",
"C": "can_interface",
"D": "display_controller",
"E": "engine_params",
"F": "fuel_system",
"G": "core_events",
"H": "hardware_monitor",
"I": "runtime_metrics",
"J": "job_scheduler",
"K": "key_manager",
"L": "signal_processing",
"M": "filters",
"N": "network_manager",
"O": "output_handlers",
"P": "protocol_handlers",
"Q": "query_engine",
"R": "renderer_core",
"S": "service_manager",
"T": "thread_manager",
"U": "util_common",
"V": "vehicle_control",
"W": "widget_base",
"X": "extension_system",
"Y": "yaml_parser",
"Z": "zip_utils",
"aa": "math_utils",
"aA": "sensors",
"ab": "sensor_calibration",
"aB": "gauge_models",
"ac": "table_editor",
"aC": "data_serialization",
"ad": "file_io",
"aD": "logging",
"ae": "scripting_engine",
"aE": "crypto",
"af": "ui_dialogs",
"aF": "data_listener",
"ag": "network_sockets",
"aG": "data_formatter",
"ah": "ui_themes",
"aH": "config_parser",
"ai": "ui_components",
"aI": "sql_queries",
"aj": "vehicle_params",
"aJ": "theme_colors",
"ak": "ui_layout",
"aK": "window_manager",
"al": "mouse_keyboard",
"aL": "event_handlers",
"am": "ui_rendering",
"aM": "animation",
"an": "ui_layout_helpers",
"aN": "dimension_utils",
"aO": "text_rendering",
"aP": "tuning_maps",
"aq": "value_conversion",
"aQ": "permission_checker",
"ar": "error_handler",
"aR": "diagnostic_view",
"as": "report_generator",
"aS": "data_validation",
"at": "timer_utils",
"aT": "resource_loader",
}
# Track used names GLOBALLY for complete uniqueness
global_used_names = set()
used_names_per_pkg = defaultdict(set)
# Walk through all packages
for pkg_dir in sorted(os.listdir(base_path)):
old_pkg_path = os.path.join(base_path, pkg_dir)
if not os.path.isdir(old_pkg_path):
continue
new_pkg = package_renames.get(pkg_dir, f"{pkg_dir}_refactored")
# First pass: record all already-existing non-obfuscated files
for file in os.listdir(old_pkg_path):
if file.endswith('.java'):
basename = file.replace('.java', '')
# Already renamed files
if len(basename) > 2 or (len(basename) > 1 and basename[0].isupper()):
used_names_per_pkg[new_pkg].add(file)
# Second pass: analyze and map obfuscated files
for file in sorted(os.listdir(old_pkg_path)):
if not file.endswith('.java'):
continue
filepath = os.path.join(old_pkg_path, file)
basename = file.replace('.java', '')
# Skip already renamed files
if len(basename) > 2 or (len(basename) > 1 and basename[0].isupper()):
stats['skipped_renamed'] += 1
continue
# Skip special completed cases
if pkg_dir == "a" and file in ["a.java", "b.java", "c.java"]:
stats['skipped_renamed'] += 1
continue
try:
with open(filepath, 'r', encoding='utf-8', errors='ignore') as f:
content = f.read()
if not content.strip():
stats['skipped_empty'] += 1
continue
# Extract class information
class_info = extract_class_info(filepath, content)
# Generate creative name with GLOBAL uniqueness check
new_file = generate_creative_name(
class_info,
pkg_dir,
basename,
global_used_names # Changed from per-package to global
)
# Track which strategy was used
if class_info['class_name'] and class_info['class_name'] in new_file:
stats['used_class_name'] += 1
elif class_info['description'] and class_info['description'] in new_file:
stats['used_description'] += 1
else:
stats['used_fallback'] += 1
# Add to BOTH global and per-package tracking
global_used_names.add(new_file)
used_names_per_pkg[new_pkg].add(new_file)
old_full = f"app/obfuscated_packages/{pkg_dir}/{file}"
new_full = f"app/obfuscated_packages/{new_pkg}/{new_file}"
mapping[old_full] = new_full
analysis_details[f"{pkg_dir}/{file}"] = {
'new_name': new_file,
'class_info': class_info
}
stats['mapped'] += 1
if stats['mapped'] % 500 == 0:
print(f"Processed {stats['mapped']} files...")
except Exception as e:
continue
# Save mapping
mapping_file = "/home/rewrich/Documents/GitHub/tustu/PACKAGE_MAPPING_SMART.json"
with open(mapping_file, 'w') as f:
json.dump(mapping, f, indent=2, sort_keys=True)
# Check for remaining duplicates (should be zero with new strategy)
print("\nVerifying global uniqueness...")
global_name_count = {}
for old_path, new_path in mapping.items():
new_name = new_path.split('/')[-1]
if new_name not in global_name_count:
global_name_count[new_name] = []
global_name_count[new_name].append((old_path, new_path))
duplicates = [name for name, paths in global_name_count.items() if len(paths) > 1]
if duplicates:
print(f"⚠️ Found {len(duplicates)} duplicate names (should not happen with new strategy)")
for name in duplicates[:5]:
print(f" - {name}: {len(global_name_count[name])} instances")
else:
print("✅ All filenames are unique!")
print(f"\n{'='*60}")
print(f"Generated smart mapping: {mapping_file}")
print(f"{'='*60}")
print(f"Total files mapped: {stats['mapped']}")
print(f"Total unique filenames: {len(global_name_count)}")
print(f"Used actual class names: {stats['used_class_name']}")
print(f"Used pattern descriptions: {stats['used_description']}")
print(f"Used fallback names: {stats['used_fallback']}")
print(f"Skipped (already renamed): {stats['skipped_renamed']}")
print(f"Skipped (empty): {stats['skipped_empty']}")
# Save analysis details
analysis_file = "/home/rewrich/Documents/GitHub/tustu/JAVA_ANALYSIS_DETAILED.json"
with open(analysis_file, 'w') as f:
json.dump(analysis_details, f, indent=2, default=str)
print(f"\nAnalysis details saved to: {analysis_file}")
return mapping
if __name__ == '__main__':
generate_smart_mapping_v2()