mirror of
https://github.com/johndoe6345789/tustu.git
synced 2026-04-24 13:45:00 +00:00
feat: add complete solution for renaming obfuscated Java files
- Implemented semantic name generation for obfuscated Java classes - Analyzed import dependencies to track class references - Created batch execution logic for renaming files and updating imports - Added comprehensive error handling and logging throughout the process - Generated a complete mapping of renamed files and their dependencies
This commit is contained in:
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -1,168 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Analyze all imports in Java files to map dependencies.
|
||||
This will help us update imports when we rename obfuscated files.
|
||||
"""
|
||||
|
||||
import os
|
||||
import json
|
||||
import re
|
||||
from pathlib import Path
|
||||
|
||||
def extract_imports(filepath):
|
||||
"""Extract all import statements from a Java file"""
|
||||
try:
|
||||
with open(filepath, 'r', encoding='utf-8', errors='ignore') as f:
|
||||
content = f.read()
|
||||
|
||||
# Find all import statements
|
||||
imports = re.findall(r'import\s+([\w.]+);', content)
|
||||
return imports
|
||||
except Exception as e:
|
||||
return []
|
||||
|
||||
def analyze_all_imports(base_dir):
|
||||
"""Analyze all imports across the codebase"""
|
||||
print("Analyzing imports across all Java files...")
|
||||
|
||||
# Load the rename mapping
|
||||
with open('PACKAGE_MAPPING_SMART.json', 'r') as f:
|
||||
rename_mapping = json.load(f)
|
||||
|
||||
# Build reverse mapping: old_path -> new_path
|
||||
old_to_new = {}
|
||||
for old_path, new_path in rename_mapping.items():
|
||||
old_to_new[old_path] = new_path
|
||||
|
||||
# Build package.ClassName -> filepath mapping for obfuscated files
|
||||
obfuscated_class_map = {}
|
||||
obfuscated_simple_map = {} # Just classname -> filepath for single-letter classes
|
||||
|
||||
for old_path in rename_mapping.keys():
|
||||
# Extract package and class name from path
|
||||
parts = old_path.split('/')
|
||||
if 'obfuscated_packages' in parts:
|
||||
# Read file to get actual package declaration and class name
|
||||
try:
|
||||
full_path = os.path.join(base_dir, old_path)
|
||||
with open(full_path, 'r', encoding='utf-8', errors='ignore') as f:
|
||||
content = f.read()
|
||||
|
||||
# Extract package statement
|
||||
pkg_match = re.search(r'package\s+([\w.]+);', content)
|
||||
# Extract class name (could be obfuscated single letter)
|
||||
class_match = re.search(r'(?:public\s+)?(?:class|interface|enum)\s+(\w+)', content)
|
||||
|
||||
if pkg_match and class_match:
|
||||
full_package = pkg_match.group(1)
|
||||
class_name = class_match.group(1)
|
||||
qualified_name = f"{full_package}.{class_name}"
|
||||
obfuscated_class_map[qualified_name] = {
|
||||
'old_path': old_path,
|
||||
'new_path': rename_mapping[old_path],
|
||||
'package': full_package,
|
||||
'old_classname': class_name
|
||||
}
|
||||
|
||||
# Also track by just classname for simple imports
|
||||
if class_name not in obfuscated_simple_map:
|
||||
obfuscated_simple_map[class_name] = []
|
||||
obfuscated_simple_map[class_name].append(qualified_name)
|
||||
except Exception as e:
|
||||
pass
|
||||
|
||||
# Now analyze imports in all Java files
|
||||
import_dependencies = {}
|
||||
files_to_update = {}
|
||||
|
||||
java_files = list(Path(base_dir).rglob('*.java'))
|
||||
print(f"Scanning {len(java_files)} Java files for imports...")
|
||||
|
||||
for idx, filepath in enumerate(java_files):
|
||||
if idx % 500 == 0 and idx > 0:
|
||||
print(f" Processed {idx} files...")
|
||||
|
||||
rel_path = str(filepath.relative_to(base_dir))
|
||||
imports = extract_imports(filepath)
|
||||
|
||||
if not imports:
|
||||
continue
|
||||
|
||||
# Check which imports reference obfuscated files
|
||||
obfuscated_imports = []
|
||||
for imp in imports:
|
||||
# Check fully qualified import
|
||||
if imp in obfuscated_class_map:
|
||||
obfuscated_imports.append({
|
||||
'import_statement': imp,
|
||||
'old_path': obfuscated_class_map[imp]['old_path'],
|
||||
'new_path': obfuscated_class_map[imp]['new_path'],
|
||||
'old_package': obfuscated_class_map[imp]['package'],
|
||||
'old_classname': obfuscated_class_map[imp]['old_classname']
|
||||
})
|
||||
else:
|
||||
# Check if just the classname matches (last part of import)
|
||||
class_part = imp.split('.')[-1]
|
||||
if class_part in obfuscated_simple_map:
|
||||
# Multiple matches possible, include all
|
||||
for qualified in obfuscated_simple_map[class_part]:
|
||||
if imp == qualified: # Exact match
|
||||
obfuscated_imports.append({
|
||||
'import_statement': imp,
|
||||
'old_path': obfuscated_class_map[qualified]['old_path'],
|
||||
'new_path': obfuscated_class_map[qualified]['new_path'],
|
||||
'old_package': obfuscated_class_map[qualified]['package'],
|
||||
'old_classname': obfuscated_class_map[qualified]['old_classname']
|
||||
})
|
||||
|
||||
if obfuscated_imports:
|
||||
import_dependencies[rel_path] = {
|
||||
'all_imports': imports,
|
||||
'obfuscated_imports': obfuscated_imports
|
||||
}
|
||||
files_to_update[rel_path] = obfuscated_imports
|
||||
|
||||
# Generate the comprehensive mapping
|
||||
result = {
|
||||
'rename_mapping': rename_mapping,
|
||||
'obfuscated_class_map': obfuscated_class_map,
|
||||
'import_dependencies': import_dependencies,
|
||||
'files_to_update': files_to_update,
|
||||
'statistics': {
|
||||
'total_files': len(rename_mapping),
|
||||
'obfuscated_classes_tracked': len(obfuscated_class_map),
|
||||
'files_with_imports': len(import_dependencies),
|
||||
'files_needing_updates': len(files_to_update)
|
||||
}
|
||||
}
|
||||
|
||||
return result
|
||||
|
||||
def main():
|
||||
base_dir = '/home/rewrich/Documents/GitHub/tustu'
|
||||
|
||||
result = analyze_all_imports(base_dir)
|
||||
|
||||
# Save the comprehensive mapping
|
||||
output_file = 'COMPLETE_RENAME_MAPPING.json'
|
||||
with open(output_file, 'w') as f:
|
||||
json.dump(result, f, indent=2)
|
||||
|
||||
print("\n" + "="*60)
|
||||
print(f"Complete mapping saved to: {output_file}")
|
||||
print("="*60)
|
||||
print(f"Total files to rename: {result['statistics']['total_files']}")
|
||||
print(f"Obfuscated classes tracked: {result['statistics']['obfuscated_classes_tracked']}")
|
||||
print(f"Files with imports: {result['statistics']['files_with_imports']}")
|
||||
print(f"Files needing import updates: {result['statistics']['files_needing_updates']}")
|
||||
|
||||
if result['statistics']['files_needing_updates'] > 0:
|
||||
print(f"\n⚠️ {result['statistics']['files_needing_updates']} files have imports that need updating")
|
||||
print("Sample files that need import updates:")
|
||||
for filepath in list(result['files_to_update'].keys())[:5]:
|
||||
print(f" - {filepath}")
|
||||
|
||||
print("\nReady for batch rename and import updates!")
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -1,873 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Smart Java file renaming with conflict resolution and creative naming.
|
||||
Uses actual class names and context to generate unique, descriptive filenames.
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
import re
|
||||
from collections import defaultdict
|
||||
|
||||
def extract_class_info(filepath, content):
|
||||
"""Extract class name and key characteristics from Java content"""
|
||||
info = {
|
||||
'class_name': None,
|
||||
'is_interface': False,
|
||||
'is_abstract': False,
|
||||
'is_enum': False,
|
||||
'extends': None,
|
||||
'implements': [],
|
||||
'key_methods': [],
|
||||
'field_types': [],
|
||||
'field_names': [],
|
||||
'string_literals': [],
|
||||
'imported_classes': [],
|
||||
'instantiated_classes': [],
|
||||
'description': None,
|
||||
'specific_name': None
|
||||
}
|
||||
|
||||
# Extract class/interface/enum declaration
|
||||
class_match = re.search(r'public\s+(?:(abstract|final)\s+)?(class|interface|enum)\s+(\w+)', content)
|
||||
if class_match:
|
||||
modifier, type_keyword, class_name = class_match.groups()
|
||||
info['class_name'] = class_name
|
||||
info['is_interface'] = (type_keyword == 'interface')
|
||||
info['is_enum'] = (type_keyword == 'enum')
|
||||
info['is_abstract'] = (modifier == 'abstract')
|
||||
|
||||
# Extract extends
|
||||
extends_match = re.search(r'extends\s+(\w+)', content)
|
||||
if extends_match:
|
||||
info['extends'] = extends_match.group(1)
|
||||
|
||||
# Extract implements
|
||||
implements_match = re.findall(r'implements\s+([\w,\s]+)', content)
|
||||
if implements_match:
|
||||
for impl in implements_match:
|
||||
info['implements'].extend([i.strip() for i in impl.split(',')])
|
||||
|
||||
# Extract ALL public methods (not just first 5)
|
||||
methods = re.findall(r'public\s+(?:static\s+)?(?:synchronized\s+)?\w+\s+(\w+)\s*\(', content)
|
||||
info['key_methods'] = methods
|
||||
|
||||
# Extract field types to understand what this class manages
|
||||
field_types = re.findall(r'(?:private|protected|public)\s+(?:static\s+)?(?:final\s+)?(\w+)\s+\w+\s*[;=]', content)
|
||||
# Filter out primitives and common types AND Java keywords
|
||||
java_primitives_and_keywords = ['int', 'long', 'boolean', 'double', 'float', 'String', 'List', 'Map', 'Object',
|
||||
'char', 'byte', 'short', 'void', 'class', 'interface', 'extends', 'implements']
|
||||
info['field_types'] = [f for f in field_types if len(f) > 2 and f not in java_primitives_and_keywords][:10]
|
||||
|
||||
# Extract field names for context
|
||||
field_names = re.findall(r'(?:private|protected|public)\s+(?:static\s+)?(?:final\s+)?\w+\s+(\w+)\s*[;=]', content)
|
||||
info['field_names'] = [f for f in field_names if len(f) > 1][:10]
|
||||
|
||||
# Extract string literals for domain understanding
|
||||
string_literals = re.findall(r'"([^"]{3,40})"', content) # 3-40 char strings
|
||||
info['string_literals'] = string_literals[:10]
|
||||
|
||||
# Extract imported classes (just the class name, not full package)
|
||||
imports = re.findall(r'import\s+[\w.]+\.(\w+);', content)
|
||||
# Filter out classes with numbers (like Base64, CRC32, etc.)
|
||||
info['imported_classes'] = [i for i in imports if len(i) > 2 and not any(c.isdigit() for c in i)][:15]
|
||||
|
||||
# Extract instantiated classes (new ClassName())
|
||||
instantiations = re.findall(r'new\s+(\w+)\s*\(', content)
|
||||
# Filter out primitives, wrappers, and classes with numbers
|
||||
info['instantiated_classes'] = [i for i in instantiations
|
||||
if len(i) > 2
|
||||
and i not in ['int', 'Integer', 'long', 'Long',
|
||||
'boolean', 'Boolean', 'double', 'Double',
|
||||
'float', 'Float', 'byte', 'Byte',
|
||||
'short', 'Short', 'char', 'Character']
|
||||
and not any(c.isdigit() for c in i)][:15]
|
||||
|
||||
# Extract method parameter types for more context
|
||||
method_params = re.findall(r'\w+\s+\w+\s*\(([^)]*)\)', content)
|
||||
param_types = []
|
||||
java_primitives_and_keywords = ['int', 'long', 'boolean', 'double', 'float', 'String', 'List', 'Map', 'Object',
|
||||
'char', 'byte', 'short', 'void', 'class', 'interface']
|
||||
for params in method_params:
|
||||
if params.strip():
|
||||
types = re.findall(r'(\w+)\s+\w+', params)
|
||||
param_types.extend([t for t in types if len(t) > 2 and t not in java_primitives_and_keywords])
|
||||
info['param_types'] = list(set(param_types))[:10]
|
||||
|
||||
# Extract method return types
|
||||
return_types = re.findall(r'(?:public|private|protected)\s+(?:static\s+)?(\w+)\s+\w+\s*\(', content)
|
||||
java_primitives_and_keywords = ['int', 'long', 'boolean', 'double', 'float', 'String', 'List', 'Map', 'Object',
|
||||
'char', 'byte', 'short', 'void', 'class', 'interface']
|
||||
info['return_types'] = [r for r in return_types if len(r) > 2 and r not in java_primitives_and_keywords][:10]
|
||||
|
||||
# Look for specific patterns in method bodies
|
||||
info['uses_thread'] = 'Thread' in content or 'Runnable' in content or 'ExecutorService' in content
|
||||
info['uses_io'] = 'InputStream' in content or 'OutputStream' in content or 'Reader' in content or 'Writer' in content
|
||||
info['uses_network'] = 'Socket' in content or 'ServerSocket' in content or 'URL' in content or 'HttpURLConnection' in content
|
||||
info['uses_swing'] = 'JFrame' in content or 'JPanel' in content or 'JButton' in content or 'JTable' in content
|
||||
info['uses_collection'] = 'List' in content or 'Map' in content or 'Set' in content
|
||||
info['uses_serialization'] = 'Serializable' in content or 'ObjectInputStream' in content or 'ObjectOutputStream' in content
|
||||
|
||||
# Try to infer specific name from content
|
||||
specific_name = None
|
||||
|
||||
# Special handling for ActionListener - look at body content first
|
||||
if 'ActionListener' in info['implements']:
|
||||
# Extract more context from the code body
|
||||
content_lower = content.lower()
|
||||
|
||||
# Look for method calls to understand the action
|
||||
method_calls = re.findall(r'\.(\w+)\(', content)
|
||||
meaningful_calls = [m for m in method_calls if len(m) > 3 and m not in ['this', 'super', 'toString', 'equals', 'hashCode', 'getClass']]
|
||||
|
||||
# Look for field assignments to understand what's being modified
|
||||
field_assignments = re.findall(r'this\.(\w+)\s*=', content)
|
||||
meaningful_fields = [f for f in field_assignments if len(f) > 1]
|
||||
|
||||
# Look for specific patterns with more context
|
||||
if '.dispose()' in content:
|
||||
# Check what's being disposed
|
||||
if 'dialog' in content_lower:
|
||||
if 'cancel' in content_lower or 'close' in content_lower:
|
||||
specific_name = 'CancelDialogAndCloseListener'
|
||||
else:
|
||||
specific_name = 'DisposeDialogListener'
|
||||
elif 'window' in content_lower or 'frame' in content_lower:
|
||||
specific_name = 'CloseWindowListener'
|
||||
else:
|
||||
specific_name = 'DisposeListener'
|
||||
|
||||
# RadioButton with specific context
|
||||
elif 'JRadioButton' in content:
|
||||
if 'rating' in content_lower or 'feedback' in content_lower:
|
||||
specific_name = 'RatingSelectionRadioButtonListener'
|
||||
elif 'parse' in content_lower or 'Integer.parseInt' in content:
|
||||
specific_name = 'ParseIntegerFromRadioButtonListener'
|
||||
elif meaningful_fields:
|
||||
specific_name = f"Update{meaningful_fields[0].capitalize()}FromRadioButtonListener"
|
||||
else:
|
||||
specific_name = 'RadioButtonSelectionListener'
|
||||
|
||||
# CheckBox with specific context
|
||||
elif 'JCheckBox' in content:
|
||||
if 'enable' in content_lower or 'disable' in content_lower:
|
||||
specific_name = 'ToggleEnableStateCheckBoxListener'
|
||||
elif 'visible' in content_lower or 'visibility' in content_lower:
|
||||
specific_name = 'ToggleVisibilityCheckBoxListener'
|
||||
elif meaningful_fields:
|
||||
specific_name = f"Update{meaningful_fields[0].capitalize()}CheckBoxListener"
|
||||
else:
|
||||
specific_name = 'CheckBoxStateChangeListener'
|
||||
|
||||
# MenuItem with context
|
||||
elif 'JMenuItem' in content or 'MenuItem' in content:
|
||||
if 'open' in content_lower:
|
||||
specific_name = 'OpenMenuItemListener'
|
||||
elif 'save' in content_lower:
|
||||
specific_name = 'SaveMenuItemListener'
|
||||
elif 'exit' in content_lower or 'quit' in content_lower:
|
||||
specific_name = 'ExitMenuItemListener'
|
||||
else:
|
||||
specific_name = 'MenuItemActionListener'
|
||||
|
||||
# Dialog patterns with more specificity
|
||||
elif ('JDialog' in content or 'Dialog' in content) and content.count('Dialog') > 1:
|
||||
if 'submit' in content_lower or 'ok' in content_lower:
|
||||
if 'validation' in content_lower or 'validate' in content_lower:
|
||||
specific_name = 'ValidateAndSubmitDialogListener'
|
||||
else:
|
||||
specific_name = 'SubmitDialogListener'
|
||||
elif 'cancel' in content_lower:
|
||||
specific_name = 'CancelDialogListener'
|
||||
elif 'apply' in content_lower:
|
||||
specific_name = 'ApplyDialogChangesListener'
|
||||
else:
|
||||
specific_name = 'DialogActionListener'
|
||||
|
||||
# Frame actions
|
||||
elif ('JFrame' in content or 'Frame' in content) and content.count('Frame') > 1:
|
||||
if 'minimize' in content_lower:
|
||||
specific_name = 'MinimizeFrameListener'
|
||||
elif 'maximize' in content_lower:
|
||||
specific_name = 'MaximizeFrameListener'
|
||||
else:
|
||||
specific_name = 'FrameActionListener'
|
||||
|
||||
# Button actions
|
||||
elif ('JButton' in content or 'Button' in content) and content.count('Button') > 2:
|
||||
if 'browse' in content_lower:
|
||||
specific_name = 'BrowseButtonClickListener'
|
||||
elif 'search' in content_lower:
|
||||
specific_name = 'SearchButtonClickListener'
|
||||
else:
|
||||
specific_name = 'ButtonClickActionListener'
|
||||
|
||||
# Look for method calls that indicate purpose (more specific)
|
||||
if not specific_name:
|
||||
if 'setEnabled' in meaningful_calls:
|
||||
specific_name = 'ToggleComponentEnabledStateListener'
|
||||
elif 'setVisible' in meaningful_calls:
|
||||
specific_name = 'ToggleComponentVisibilityListener'
|
||||
elif 'setText' in meaningful_calls:
|
||||
specific_name = 'UpdateTextFieldListener'
|
||||
elif 'setSelected' in meaningful_calls:
|
||||
specific_name = 'UpdateSelectionStateListener'
|
||||
elif any(m.startswith('show') for m in meaningful_calls):
|
||||
show_methods = [m for m in meaningful_calls if m.startswith('show')]
|
||||
specific_name = f"{show_methods[0].capitalize()}Listener"
|
||||
elif any(m.startswith('open') for m in meaningful_calls):
|
||||
open_methods = [m for m in meaningful_calls if m.startswith('open')]
|
||||
specific_name = f"{open_methods[0].capitalize()}Listener"
|
||||
elif any(m.startswith('start') for m in meaningful_calls):
|
||||
start_methods = [m for m in meaningful_calls if m.startswith('start')]
|
||||
specific_name = f"{start_methods[0].capitalize()}Listener"
|
||||
elif meaningful_calls and len(meaningful_calls) > 0:
|
||||
# Use the most prominent method call
|
||||
specific_name = f"{meaningful_calls[0].capitalize()}ActionListener"
|
||||
|
||||
# Fallback to generic ActionListener
|
||||
if not specific_name:
|
||||
specific_name = 'GenericActionListener'
|
||||
|
||||
# Check for OTHER listener/callback patterns with specific event types (not ActionListener)
|
||||
if not specific_name:
|
||||
for impl in info['implements']:
|
||||
if impl != 'ActionListener' and ('Listener' in impl or 'Callback' in impl):
|
||||
# Extract the specific type (e.g., "DeviceUpdate" from "DeviceUpdateListener")
|
||||
specific = impl.replace('Listener', '').replace('Callback', '').replace('Handler', '')
|
||||
if len(specific) > 2:
|
||||
specific_name = f"{specific}Listener"
|
||||
break
|
||||
|
||||
# Look for dominant field types (what this class manages)
|
||||
if not specific_name and info['field_types']:
|
||||
type_counts = {}
|
||||
for ft in info['field_types']:
|
||||
if ft not in ['int', 'long', 'boolean', 'double', 'float', 'String', 'List', 'Map', 'Object']:
|
||||
type_counts[ft] = type_counts.get(ft, 0) + 1
|
||||
if type_counts:
|
||||
dominant_type = max(type_counts.items(), key=lambda x: x[1])[0]
|
||||
if len(dominant_type) > 2:
|
||||
# Add technology context
|
||||
if info.get('uses_network') and 'Socket' not in dominant_type:
|
||||
specific_name = f"Network{dominant_type}Manager"
|
||||
elif info.get('uses_io') and 'File' in content:
|
||||
specific_name = f"File{dominant_type}Manager"
|
||||
elif info.get('uses_thread'):
|
||||
specific_name = f"Threaded{dominant_type}Manager"
|
||||
elif type_counts[dominant_type] > 1:
|
||||
specific_name = f"Multiple{dominant_type}Manager"
|
||||
else:
|
||||
specific_name = f"{dominant_type}Manager"
|
||||
|
||||
# Analyze field names for semantic meaning
|
||||
if not specific_name and info['field_names']:
|
||||
meaningful_fields = [f for f in info['field_names'] if len(f) > 3 and f not in ['this', 'that', 'temp', 'tmp', 'value', 'data']]
|
||||
if meaningful_fields:
|
||||
# Look for domain-specific field patterns
|
||||
field_str = ' '.join(meaningful_fields).lower()
|
||||
if 'connection' in field_str or 'socket' in field_str:
|
||||
specific_name = 'ConnectionManager'
|
||||
elif 'cache' in field_str or 'buffer' in field_str:
|
||||
specific_name = 'CacheManager'
|
||||
elif 'config' in field_str or 'setting' in field_str:
|
||||
specific_name = 'ConfigurationManager'
|
||||
elif 'logger' in field_str or 'logging' in field_str:
|
||||
specific_name = 'LoggingManager'
|
||||
elif 'session' in field_str:
|
||||
specific_name = 'SessionManager'
|
||||
elif 'queue' in field_str:
|
||||
specific_name = 'QueueManager'
|
||||
|
||||
# Look at instantiated classes for more specific naming
|
||||
if not specific_name and info['instantiated_classes']:
|
||||
# Filter out common types
|
||||
meaningful_classes = [c for c in info['instantiated_classes']
|
||||
if c not in ['StringBuilder', 'StringBuffer', 'ArrayList', 'HashMap', 'HashSet',
|
||||
'LinkedList', 'TreeMap', 'TreeSet', 'Vector', 'Hashtable', 'Exception',
|
||||
'RuntimeException', 'IllegalArgumentException', 'Date', 'Calendar',
|
||||
'String', 'Integer', 'Long', 'Double', 'Float', 'Boolean', 'Object']]
|
||||
if meaningful_classes:
|
||||
# Get most common instantiation
|
||||
class_counts = {}
|
||||
for cls in meaningful_classes:
|
||||
class_counts[cls] = class_counts.get(cls, 0) + 1
|
||||
most_common = max(class_counts.items(), key=lambda x: x[1])[0]
|
||||
# Only use if it's not an exception being thrown
|
||||
if not most_common.endswith('Exception'):
|
||||
if class_counts[most_common] > 1:
|
||||
specific_name = f"Multiple{most_common}Creator"
|
||||
else:
|
||||
specific_name = f"{most_common}Creator"
|
||||
|
||||
# Use string literals to understand domain context
|
||||
if not specific_name and info['string_literals']:
|
||||
# Look for domain-specific keywords in strings
|
||||
combined_strings = ' '.join(info['string_literals']).lower()
|
||||
domain_keywords = {
|
||||
'error': 'ErrorHandler',
|
||||
'warning': 'WarningHandler',
|
||||
'log': 'LogHandler',
|
||||
'file': 'FileHandler',
|
||||
'connection': 'ConnectionHandler',
|
||||
'socket': 'SocketHandler',
|
||||
'serial': 'SerialHandler',
|
||||
'port': 'PortHandler',
|
||||
'device': 'DeviceHandler',
|
||||
'sensor': 'SensorHandler',
|
||||
'gauge': 'GaugeHandler',
|
||||
'table': 'TableHandler',
|
||||
'chart': 'ChartHandler',
|
||||
'graph': 'GraphHandler',
|
||||
'config': 'ConfigHandler',
|
||||
'setting': 'SettingsHandler',
|
||||
'parameter': 'ParameterHandler',
|
||||
'tuning': 'TuningHandler',
|
||||
'calibration': 'CalibrationHandler',
|
||||
'diagnostic': 'DiagnosticHandler',
|
||||
}
|
||||
for keyword, handler_name in domain_keywords.items():
|
||||
if keyword in combined_strings:
|
||||
specific_name = handler_name
|
||||
break
|
||||
|
||||
# Look for method name patterns (verbs + nouns)
|
||||
if not specific_name and info['key_methods']:
|
||||
# Find methods with meaningful names (>3 chars)
|
||||
meaningful_methods = [m for m in info['key_methods'] if len(m) > 3]
|
||||
if meaningful_methods:
|
||||
# Look for common patterns
|
||||
for method in meaningful_methods:
|
||||
# handleXxx, processXxx, manageXxx patterns
|
||||
for prefix in ['handle', 'process', 'manage', 'update', 'create', 'build']:
|
||||
if method.lower().startswith(prefix) and len(method) > len(prefix) + 2:
|
||||
noun = method[len(prefix):].capitalize()
|
||||
specific_name = f"{noun}Handler"
|
||||
break
|
||||
if specific_name:
|
||||
break
|
||||
|
||||
info['specific_name'] = specific_name
|
||||
|
||||
# Generate description based on context
|
||||
desc_parts = []
|
||||
|
||||
if info['is_interface']:
|
||||
desc_parts.append('Interface')
|
||||
elif info['is_enum']:
|
||||
desc_parts.append('Enum')
|
||||
elif info['is_abstract']:
|
||||
desc_parts.append('Abstract')
|
||||
|
||||
# Add pattern hints
|
||||
if any(m in ['getInstance', 'create', 'newInstance'] for m in info['key_methods']):
|
||||
desc_parts.append('Factory')
|
||||
elif any('Listener' in i or 'Observer' in i for i in info['implements']):
|
||||
desc_parts.append('Listener')
|
||||
elif 'Exception' in str(info['extends']) or 'Error' in str(info['extends']):
|
||||
desc_parts.append('Exception')
|
||||
elif any(m.startswith('render') or m == 'paint' for m in info['key_methods']):
|
||||
desc_parts.append('Renderer')
|
||||
elif any(m == 'parse' or 'parse' in m.lower() for m in info['key_methods']):
|
||||
desc_parts.append('Parser')
|
||||
elif any(m in ['save', 'load', 'persist', 'fetch'] for m in info['key_methods']):
|
||||
desc_parts.append('Repository')
|
||||
elif any(m in ['start', 'stop', 'execute', 'run'] for m in info['key_methods']):
|
||||
desc_parts.append('Service')
|
||||
elif any(m in ['validate', 'check', 'isValid'] for m in info['key_methods']):
|
||||
desc_parts.append('Validator')
|
||||
elif len(info['key_methods']) >= 4:
|
||||
desc_parts.append('Manager')
|
||||
|
||||
info['description'] = ''.join(desc_parts) if desc_parts else None
|
||||
|
||||
return info
|
||||
|
||||
def generate_creative_name(class_info, pkg_name, fallback_letter, used_names):
|
||||
"""Generate a creative, unique filename using deep semantic analysis"""
|
||||
|
||||
# Helper to build context-rich names
|
||||
def build_contextual_name(base_name):
|
||||
"""Add context to base name to make it unique"""
|
||||
# Sanitize base name: spell out numbers
|
||||
number_words = {'0': 'Zero', '1': 'One', '2': 'Two', '3': 'Three', '4': 'Four',
|
||||
'5': 'Five', '6': 'Six', '7': 'Seven', '8': 'Eight', '9': 'Nine'}
|
||||
sanitized_base = base_name
|
||||
for digit, word in number_words.items():
|
||||
sanitized_base = sanitized_base.replace(digit, word)
|
||||
|
||||
# Also check if it's a Java keyword - if so, add descriptive suffix
|
||||
java_keywords = ['boolean', 'int', 'long', 'double', 'float', 'char', 'byte', 'short',
|
||||
'void', 'class', 'interface', 'Interface']
|
||||
if sanitized_base.lower() in java_keywords:
|
||||
sanitized_base = f"{sanitized_base}Type" # "boolean" → "booleanType"
|
||||
|
||||
if sanitized_base + ".java" not in used_names:
|
||||
return sanitized_base + ".java"
|
||||
return sanitized_base + ".java"
|
||||
|
||||
# Add field type context
|
||||
if class_info.get('field_types'):
|
||||
for ft in class_info['field_types'][:5]:
|
||||
if ft.lower() not in sanitized_base.lower() and len(ft) > 2:
|
||||
# Sanitize field type too
|
||||
sanitized_ft = ft
|
||||
for digit, word in number_words.items():
|
||||
sanitized_ft = sanitized_ft.replace(digit, word)
|
||||
name = f"{sanitized_base}With{sanitized_ft}"
|
||||
if name + ".java" not in used_names:
|
||||
return name + ".java"
|
||||
|
||||
# Add instantiated class context (avoid redundancy)
|
||||
if class_info.get('instantiated_classes'):
|
||||
for inst in class_info['instantiated_classes'][:5]:
|
||||
if inst.lower() not in base_name.lower() and len(inst) > 2:
|
||||
if not inst.endswith('Exception'):
|
||||
name = f"{base_name}Creating{inst}"
|
||||
if name + ".java" not in used_names:
|
||||
return name + ".java"
|
||||
|
||||
# Add param type context
|
||||
if class_info.get('param_types'):
|
||||
for param in class_info['param_types'][:5]:
|
||||
if param.lower() not in base_name.lower() and len(param) > 2:
|
||||
name = f"{base_name}Using{param}"
|
||||
if name + ".java" not in used_names:
|
||||
return name + ".java"
|
||||
|
||||
# Add return type context
|
||||
if class_info.get('return_types'):
|
||||
for ret in class_info['return_types'][:5]:
|
||||
if ret.lower() not in base_name.lower() and len(ret) > 2:
|
||||
name = f"{base_name}Returning{ret}"
|
||||
if name + ".java" not in used_names:
|
||||
return name + ".java"
|
||||
|
||||
# Add method name context (skip getters/setters)
|
||||
if class_info.get('key_methods'):
|
||||
for method in class_info['key_methods'][:10]:
|
||||
if len(method) > 3 and not method.startswith('get') and not method.startswith('set'):
|
||||
if method.lower() not in base_name.lower():
|
||||
name = f"{base_name}{method.capitalize()}"
|
||||
if name + ".java" not in used_names:
|
||||
return name + ".java"
|
||||
|
||||
# Add field name context
|
||||
if class_info.get('field_names'):
|
||||
for field in class_info['field_names'][:10]:
|
||||
if len(field) > 3 and field.lower() not in base_name.lower():
|
||||
name = f"{base_name}For{field.capitalize()}"
|
||||
if name + ".java" not in used_names:
|
||||
return name + ".java"
|
||||
|
||||
# Add package context (clean version)
|
||||
pkg_clean = pkg_name.replace('_', '').capitalize()
|
||||
name = f"{base_name}In{pkg_clean}Package"
|
||||
if name + ".java" not in used_names:
|
||||
return name + ".java"
|
||||
|
||||
# Last resort: combine multiple contexts
|
||||
if class_info.get('field_types') and class_info.get('key_methods'):
|
||||
ft = class_info['field_types'][0]
|
||||
method = [m for m in class_info['key_methods'] if len(m) > 3][0] if [m for m in class_info['key_methods'] if len(m) > 3] else ''
|
||||
if method:
|
||||
name = f"{base_name}{method.capitalize()}{ft}"
|
||||
if name + ".java" not in used_names:
|
||||
return name + ".java"
|
||||
|
||||
return None
|
||||
|
||||
# Strategy 1: Use the specific inferred name with context
|
||||
if class_info.get('specific_name') and len(class_info['specific_name']) > 2:
|
||||
result = build_contextual_name(class_info['specific_name'])
|
||||
if result:
|
||||
return result
|
||||
|
||||
# Strategy 2: Use actual class name with context
|
||||
if class_info['class_name'] and len(class_info['class_name']) > 2:
|
||||
# Avoid Java keywords
|
||||
java_keywords = ['boolean', 'int', 'long', 'double', 'float', 'char', 'byte', 'short',
|
||||
'void', 'class', 'interface', 'extends', 'implements', 'package', 'import',
|
||||
'public', 'private', 'protected', 'static', 'final', 'abstract']
|
||||
if class_info['class_name'].lower() not in java_keywords:
|
||||
result = build_contextual_name(class_info['class_name'])
|
||||
if result:
|
||||
return result
|
||||
|
||||
# Strategy 3: Use description with enriched context
|
||||
if class_info['description']:
|
||||
# Skip if description itself is a keyword
|
||||
java_keywords = ['boolean', 'int', 'long', 'double', 'float', 'char', 'byte', 'short',
|
||||
'void', 'class', 'interface', 'extends', 'implements', 'Interface']
|
||||
if class_info['description'].lower() not in java_keywords:
|
||||
result = build_contextual_name(class_info['description'])
|
||||
if result:
|
||||
return result
|
||||
|
||||
# Strategy 4: Build name from extends
|
||||
if class_info.get('extends') and len(class_info['extends']) > 2:
|
||||
base_name = f"{class_info['extends']}Extension"
|
||||
result = build_contextual_name(base_name)
|
||||
if result:
|
||||
return result
|
||||
|
||||
# Strategy 5: Use implements interface with context
|
||||
if class_info.get('implements'):
|
||||
for impl in class_info['implements']:
|
||||
impl_name = impl.split('.')[-1]
|
||||
if len(impl_name) > 2:
|
||||
result = build_contextual_name(f"{impl_name}Impl")
|
||||
if result:
|
||||
return result
|
||||
|
||||
# Strategy 6: Build name from multiple characteristics
|
||||
name_parts = []
|
||||
|
||||
# Add technology context
|
||||
if class_info.get('uses_network'):
|
||||
name_parts.append('Network')
|
||||
elif class_info.get('uses_io'):
|
||||
name_parts.append('IO')
|
||||
elif class_info.get('uses_thread'):
|
||||
name_parts.append('Threaded')
|
||||
elif class_info.get('uses_swing'):
|
||||
name_parts.append('UI')
|
||||
|
||||
# Add primary field type
|
||||
if class_info.get('field_types'):
|
||||
ft = class_info['field_types'][0]
|
||||
if len(ft) > 2 and ft not in ['String', 'Object', 'List', 'Map']:
|
||||
name_parts.append(ft)
|
||||
|
||||
# Add primary method
|
||||
if class_info.get('key_methods'):
|
||||
for method in class_info['key_methods'][:5]:
|
||||
if len(method) > 3 and not method.startswith('get') and not method.startswith('set'):
|
||||
name_parts.append(method.capitalize())
|
||||
break
|
||||
|
||||
if len(name_parts) > 0:
|
||||
base_name = ''.join(name_parts)
|
||||
result = build_contextual_name(base_name)
|
||||
if result:
|
||||
return result
|
||||
|
||||
# Strategy 7: Package-based comprehensive name
|
||||
pkg_clean = pkg_name.replace('_', '').capitalize()
|
||||
base_name = f"{pkg_clean}Component"
|
||||
result = build_contextual_name(base_name)
|
||||
if result:
|
||||
return result
|
||||
|
||||
# Final fallback: Combine everything for guaranteed uniqueness
|
||||
# NO single letters, NO numbers - just descriptive concatenation
|
||||
parts = [pkg_clean]
|
||||
|
||||
if class_info.get('description'):
|
||||
parts.append(class_info['description'])
|
||||
elif class_info.get('is_interface'):
|
||||
parts.append('InterfaceImpl')
|
||||
elif class_info.get('is_abstract'):
|
||||
parts.append('AbstractImpl')
|
||||
else:
|
||||
parts.append('ComponentImpl')
|
||||
|
||||
if class_info.get('field_types'):
|
||||
ft = class_info['field_types'][0]
|
||||
if len(ft) > 2:
|
||||
parts.append('Managing' + ft)
|
||||
elif class_info.get('param_types'):
|
||||
pt = class_info['param_types'][0]
|
||||
if len(pt) > 2:
|
||||
parts.append('Processing' + pt)
|
||||
|
||||
if class_info.get('key_methods'):
|
||||
for m in class_info['key_methods']:
|
||||
if len(m) > 3 and not m.startswith('get') and not m.startswith('set'):
|
||||
parts.append('Via' + m.capitalize())
|
||||
break
|
||||
|
||||
# Use original filename letter as distinguisher (spelled out)
|
||||
letter_names = {
|
||||
'a': 'Alpha', 'b': 'Beta', 'c': 'Charlie', 'd': 'Delta', 'e': 'Echo',
|
||||
'f': 'Foxtrot', 'g': 'Golf', 'h': 'Hotel', 'i': 'India', 'j': 'Juliet',
|
||||
'k': 'Kilo', 'l': 'Lima', 'm': 'Mike', 'n': 'November', 'o': 'Oscar',
|
||||
'p': 'Papa', 'q': 'Quebec', 'r': 'Romeo', 's': 'Sierra', 't': 'Tango',
|
||||
'u': 'Uniform', 'v': 'Victor', 'w': 'Whiskey', 'x': 'Xray', 'y': 'Yankee',
|
||||
'z': 'Zulu'
|
||||
}
|
||||
letter_key = fallback_letter.lower()
|
||||
if letter_key in letter_names:
|
||||
parts.append(letter_names[letter_key])
|
||||
else:
|
||||
# For multi-char filenames, use them directly
|
||||
parts.append(fallback_letter.capitalize() + 'Variant')
|
||||
|
||||
final_name = ''.join(parts) + '.java'
|
||||
|
||||
# If still duplicate, try adding more context systematically
|
||||
if final_name in used_names:
|
||||
# Add imported classes
|
||||
if class_info.get('imported_classes'):
|
||||
for imp in class_info['imported_classes'][:3]:
|
||||
test_name = ''.join(parts) + 'Imports' + imp + '.java'
|
||||
if test_name not in used_names:
|
||||
return test_name
|
||||
|
||||
# Add string literal hints
|
||||
if class_info.get('string_literals'):
|
||||
for lit in class_info['string_literals'][:2]:
|
||||
# Clean the string for use in filename
|
||||
clean_lit = ''.join(c.capitalize() if c.isalnum() else '' for c in lit)
|
||||
if len(clean_lit) > 3:
|
||||
test_name = ''.join(parts) + 'Ref' + clean_lit[:15] + '.java'
|
||||
if test_name not in used_names:
|
||||
return test_name
|
||||
|
||||
# Add extends/implements info
|
||||
if class_info.get('extends'):
|
||||
test_name = ''.join(parts) + 'Extends' + class_info['extends'] + '.java'
|
||||
if test_name not in used_names:
|
||||
return test_name
|
||||
|
||||
# Ultimate fallback: use phonetic alphabet with package combo
|
||||
combo = pkg_clean + letter_names.get(letter_key, fallback_letter.capitalize())
|
||||
test_name = ''.join(parts) + 'Unique' + combo + '.java'
|
||||
if test_name not in used_names:
|
||||
return test_name
|
||||
|
||||
# If STILL duplicate (nearly impossible), add more parts
|
||||
extra_parts = []
|
||||
if class_info.get('uses_network'):
|
||||
extra_parts.append('Network')
|
||||
if class_info.get('uses_io'):
|
||||
extra_parts.append('InputOutput')
|
||||
if class_info.get('uses_thread'):
|
||||
extra_parts.append('Threading')
|
||||
if class_info.get('uses_swing'):
|
||||
extra_parts.append('SwingUI')
|
||||
if extra_parts:
|
||||
return ''.join(parts) + ''.join(extra_parts) + combo + '.java'
|
||||
else:
|
||||
# Very last resort: full package name spelling
|
||||
return ''.join(parts) + 'From' + pkg_name.replace('_', '').capitalize() + combo + '.java'
|
||||
|
||||
return final_name
|
||||
|
||||
def generate_smart_mapping_v2():
|
||||
"""Generate improved mapping with conflict resolution"""
|
||||
|
||||
base_path = "/home/rewrich/Documents/GitHub/tustu/app/obfuscated_packages"
|
||||
|
||||
mapping = {}
|
||||
analysis_details = {}
|
||||
stats = {
|
||||
'mapped': 0,
|
||||
'skipped_renamed': 0,
|
||||
'skipped_empty': 0,
|
||||
'used_class_name': 0,
|
||||
'used_description': 0,
|
||||
'used_fallback': 0
|
||||
}
|
||||
|
||||
# Package name mappings
|
||||
package_renames = {
|
||||
"a": "linear_algebra",
|
||||
"A": "ui_colors",
|
||||
"B": "bluetooth_comm",
|
||||
"C": "can_interface",
|
||||
"D": "display_controller",
|
||||
"E": "engine_params",
|
||||
"F": "fuel_system",
|
||||
"G": "core_events",
|
||||
"H": "hardware_monitor",
|
||||
"I": "runtime_metrics",
|
||||
"J": "job_scheduler",
|
||||
"K": "key_manager",
|
||||
"L": "signal_processing",
|
||||
"M": "filters",
|
||||
"N": "network_manager",
|
||||
"O": "output_handlers",
|
||||
"P": "protocol_handlers",
|
||||
"Q": "query_engine",
|
||||
"R": "renderer_core",
|
||||
"S": "service_manager",
|
||||
"T": "thread_manager",
|
||||
"U": "util_common",
|
||||
"V": "vehicle_control",
|
||||
"W": "widget_base",
|
||||
"X": "extension_system",
|
||||
"Y": "yaml_parser",
|
||||
"Z": "zip_utils",
|
||||
"aa": "math_utils",
|
||||
"aA": "sensors",
|
||||
"ab": "sensor_calibration",
|
||||
"aB": "gauge_models",
|
||||
"ac": "table_editor",
|
||||
"aC": "data_serialization",
|
||||
"ad": "file_io",
|
||||
"aD": "logging",
|
||||
"ae": "scripting_engine",
|
||||
"aE": "crypto",
|
||||
"af": "ui_dialogs",
|
||||
"aF": "data_listener",
|
||||
"ag": "network_sockets",
|
||||
"aG": "data_formatter",
|
||||
"ah": "ui_themes",
|
||||
"aH": "config_parser",
|
||||
"ai": "ui_components",
|
||||
"aI": "sql_queries",
|
||||
"aj": "vehicle_params",
|
||||
"aJ": "theme_colors",
|
||||
"ak": "ui_layout",
|
||||
"aK": "window_manager",
|
||||
"al": "mouse_keyboard",
|
||||
"aL": "event_handlers",
|
||||
"am": "ui_rendering",
|
||||
"aM": "animation",
|
||||
"an": "ui_layout_helpers",
|
||||
"aN": "dimension_utils",
|
||||
"aO": "text_rendering",
|
||||
"aP": "tuning_maps",
|
||||
"aq": "value_conversion",
|
||||
"aQ": "permission_checker",
|
||||
"ar": "error_handler",
|
||||
"aR": "diagnostic_view",
|
||||
"as": "report_generator",
|
||||
"aS": "data_validation",
|
||||
"at": "timer_utils",
|
||||
"aT": "resource_loader",
|
||||
}
|
||||
|
||||
# Track used names GLOBALLY for complete uniqueness
|
||||
global_used_names = set()
|
||||
used_names_per_pkg = defaultdict(set)
|
||||
|
||||
# Walk through all packages
|
||||
for pkg_dir in sorted(os.listdir(base_path)):
|
||||
old_pkg_path = os.path.join(base_path, pkg_dir)
|
||||
|
||||
if not os.path.isdir(old_pkg_path):
|
||||
continue
|
||||
|
||||
new_pkg = package_renames.get(pkg_dir, f"{pkg_dir}_refactored")
|
||||
|
||||
# First pass: record all already-existing non-obfuscated files
|
||||
for file in os.listdir(old_pkg_path):
|
||||
if file.endswith('.java'):
|
||||
basename = file.replace('.java', '')
|
||||
# Already renamed files
|
||||
if len(basename) > 2 or (len(basename) > 1 and basename[0].isupper()):
|
||||
used_names_per_pkg[new_pkg].add(file)
|
||||
|
||||
# Second pass: analyze and map obfuscated files
|
||||
for file in sorted(os.listdir(old_pkg_path)):
|
||||
if not file.endswith('.java'):
|
||||
continue
|
||||
|
||||
filepath = os.path.join(old_pkg_path, file)
|
||||
basename = file.replace('.java', '')
|
||||
|
||||
# Skip already renamed files
|
||||
if len(basename) > 2 or (len(basename) > 1 and basename[0].isupper()):
|
||||
stats['skipped_renamed'] += 1
|
||||
continue
|
||||
|
||||
# Skip special completed cases
|
||||
if pkg_dir == "a" and file in ["a.java", "b.java", "c.java"]:
|
||||
stats['skipped_renamed'] += 1
|
||||
continue
|
||||
|
||||
try:
|
||||
with open(filepath, 'r', encoding='utf-8', errors='ignore') as f:
|
||||
content = f.read()
|
||||
|
||||
if not content.strip():
|
||||
stats['skipped_empty'] += 1
|
||||
continue
|
||||
|
||||
# Extract class information
|
||||
class_info = extract_class_info(filepath, content)
|
||||
|
||||
# Generate creative name with GLOBAL uniqueness check
|
||||
new_file = generate_creative_name(
|
||||
class_info,
|
||||
pkg_dir,
|
||||
basename,
|
||||
global_used_names # Changed from per-package to global
|
||||
)
|
||||
|
||||
# Track which strategy was used
|
||||
if class_info['class_name'] and class_info['class_name'] in new_file:
|
||||
stats['used_class_name'] += 1
|
||||
elif class_info['description'] and class_info['description'] in new_file:
|
||||
stats['used_description'] += 1
|
||||
else:
|
||||
stats['used_fallback'] += 1
|
||||
|
||||
# Add to BOTH global and per-package tracking
|
||||
global_used_names.add(new_file)
|
||||
used_names_per_pkg[new_pkg].add(new_file)
|
||||
|
||||
old_full = f"app/obfuscated_packages/{pkg_dir}/{file}"
|
||||
new_full = f"app/obfuscated_packages/{new_pkg}/{new_file}"
|
||||
|
||||
mapping[old_full] = new_full
|
||||
analysis_details[f"{pkg_dir}/{file}"] = {
|
||||
'new_name': new_file,
|
||||
'class_info': class_info
|
||||
}
|
||||
|
||||
stats['mapped'] += 1
|
||||
|
||||
if stats['mapped'] % 500 == 0:
|
||||
print(f"Processed {stats['mapped']} files...")
|
||||
|
||||
except Exception as e:
|
||||
continue
|
||||
|
||||
# Save mapping
|
||||
mapping_file = "/home/rewrich/Documents/GitHub/tustu/PACKAGE_MAPPING_SMART.json"
|
||||
with open(mapping_file, 'w') as f:
|
||||
json.dump(mapping, f, indent=2, sort_keys=True)
|
||||
|
||||
# Check for remaining duplicates (should be zero with new strategy)
|
||||
print("\nVerifying global uniqueness...")
|
||||
global_name_count = {}
|
||||
for old_path, new_path in mapping.items():
|
||||
new_name = new_path.split('/')[-1]
|
||||
if new_name not in global_name_count:
|
||||
global_name_count[new_name] = []
|
||||
global_name_count[new_name].append((old_path, new_path))
|
||||
|
||||
duplicates = [name for name, paths in global_name_count.items() if len(paths) > 1]
|
||||
if duplicates:
|
||||
print(f"⚠️ Found {len(duplicates)} duplicate names (should not happen with new strategy)")
|
||||
for name in duplicates[:5]:
|
||||
print(f" - {name}: {len(global_name_count[name])} instances")
|
||||
else:
|
||||
print("✅ All filenames are unique!")
|
||||
|
||||
print(f"\n{'='*60}")
|
||||
print(f"Generated smart mapping: {mapping_file}")
|
||||
print(f"{'='*60}")
|
||||
print(f"Total files mapped: {stats['mapped']}")
|
||||
print(f"Total unique filenames: {len(global_name_count)}")
|
||||
print(f"Used actual class names: {stats['used_class_name']}")
|
||||
print(f"Used pattern descriptions: {stats['used_description']}")
|
||||
print(f"Used fallback names: {stats['used_fallback']}")
|
||||
print(f"Skipped (already renamed): {stats['skipped_renamed']}")
|
||||
print(f"Skipped (empty): {stats['skipped_empty']}")
|
||||
|
||||
# Save analysis details
|
||||
analysis_file = "/home/rewrich/Documents/GitHub/tustu/JAVA_ANALYSIS_DETAILED.json"
|
||||
with open(analysis_file, 'w') as f:
|
||||
json.dump(analysis_details, f, indent=2, default=str)
|
||||
|
||||
print(f"\nAnalysis details saved to: {analysis_file}")
|
||||
|
||||
return mapping
|
||||
|
||||
if __name__ == '__main__':
|
||||
generate_smart_mapping_v2()
|
||||
507
rename_obfuscated_files.py
Normal file
507
rename_obfuscated_files.py
Normal file
@@ -0,0 +1,507 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Complete solution for renaming obfuscated Java files:
|
||||
1. Generate smart semantic names (no single letters, no numbers, no keywords)
|
||||
2. Analyze all import dependencies
|
||||
3. Execute batch rename with import updates
|
||||
"""
|
||||
|
||||
import os
|
||||
import json
|
||||
import re
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
from collections import defaultdict
|
||||
|
||||
# ============================================================================
|
||||
# PART 1: SEMANTIC NAME GENERATION
|
||||
# ============================================================================
|
||||
|
||||
def extract_class_info(filepath, content):
|
||||
"""Extract comprehensive information about a Java class"""
|
||||
info = {
|
||||
'filepath': filepath,
|
||||
'class_name': None,
|
||||
'is_interface': False,
|
||||
'is_enum': False,
|
||||
'is_abstract': False,
|
||||
'extends': None,
|
||||
'implements': [],
|
||||
'key_methods': [],
|
||||
'field_types': [],
|
||||
'field_names': [],
|
||||
'imported_classes': [],
|
||||
'instantiated_classes': [],
|
||||
'param_types': [],
|
||||
'return_types': [],
|
||||
'string_literals': [],
|
||||
'description': None,
|
||||
'specific_name': None
|
||||
}
|
||||
|
||||
# Extract class/interface/enum name
|
||||
class_match = re.search(r'(?:public\s+)?(?:abstract\s+)?(?:final\s+)?(class|interface|enum)\s+(\w+)', content)
|
||||
if class_match:
|
||||
info['class_name'] = class_match.group(2)
|
||||
info['is_interface'] = class_match.group(1) == 'interface'
|
||||
info['is_enum'] = class_match.group(1) == 'enum'
|
||||
|
||||
info['is_abstract'] = 'abstract class' in content
|
||||
|
||||
# Extract extends and implements
|
||||
extends_match = re.search(r'extends\s+([\w.]+)', content)
|
||||
if extends_match:
|
||||
info['extends'] = extends_match.group(1).split('.')[-1]
|
||||
|
||||
implements_matches = re.findall(r'implements\s+([\w.,\s]+)', content)
|
||||
for impl_str in implements_matches:
|
||||
interfaces = [i.strip() for i in impl_str.split(',')]
|
||||
info['implements'].extend(interfaces)
|
||||
|
||||
# Extract method names
|
||||
methods = re.findall(r'(?:public|private|protected)\s+(?:static\s+)?(?:\w+\s+)?(\w+)\s*\(', content)
|
||||
info['key_methods'] = [m for m in methods if len(m) > 2 and m not in ['get', 'set', 'is']][:15]
|
||||
|
||||
# Extract field types and names
|
||||
fields = re.findall(r'(?:private|protected|public)\s+(?:static\s+)?(?:final\s+)?(\w+)\s+(\w+)\s*[;=]', content)
|
||||
java_primitives_and_keywords = ['int', 'long', 'boolean', 'double', 'float', 'String', 'List', 'Map', 'Object',
|
||||
'char', 'byte', 'short', 'void', 'class', 'interface', 'extends', 'implements']
|
||||
info['field_types'] = [f[0] for f in fields if len(f[0]) > 2 and f[0] not in java_primitives_and_keywords][:10]
|
||||
info['field_names'] = [f[1] for f in fields if len(f[1]) > 2][:10]
|
||||
|
||||
# Extract imports
|
||||
imports = re.findall(r'import\s+[\w.]+\.(\w+);', content)
|
||||
info['imported_classes'] = [i for i in imports if len(i) > 2 and not any(c.isdigit() for c in i)][:15]
|
||||
|
||||
# Extract instantiations
|
||||
instantiations = re.findall(r'new\s+(\w+)\s*\(', content)
|
||||
info['instantiated_classes'] = [i for i in instantiations
|
||||
if len(i) > 2
|
||||
and i not in java_primitives_and_keywords
|
||||
and not any(c.isdigit() for c in i)][:15]
|
||||
|
||||
# Extract parameter types
|
||||
method_params = re.findall(r'\w+\s+\w+\s*\(([^)]*)\)', content)
|
||||
param_types = []
|
||||
for params in method_params:
|
||||
if params.strip():
|
||||
types = re.findall(r'(\w+)\s+\w+', params)
|
||||
param_types.extend([t for t in types if len(t) > 2 and t not in java_primitives_and_keywords])
|
||||
info['param_types'] = list(set(param_types))[:10]
|
||||
|
||||
# Extract return types
|
||||
return_types = re.findall(r'(?:public|private|protected)\s+(?:static\s+)?(\w+)\s+\w+\s*\(', content)
|
||||
info['return_types'] = [r for r in return_types if len(r) > 2 and r not in java_primitives_and_keywords][:10]
|
||||
|
||||
# Technology flags
|
||||
info['uses_thread'] = 'Thread' in content or 'Runnable' in content
|
||||
info['uses_io'] = 'InputStream' in content or 'OutputStream' in content
|
||||
info['uses_network'] = 'Socket' in content or 'URL' in content
|
||||
info['uses_swing'] = 'JFrame' in content or 'JPanel' in content or 'JButton' in content
|
||||
info['uses_collection'] = 'List' in content or 'Map' in content
|
||||
info['uses_serialization'] = 'Serializable' in content
|
||||
|
||||
# Try to infer specific listener name
|
||||
specific_name = None
|
||||
for impl in info['implements']:
|
||||
if 'Listener' in impl:
|
||||
specific_name = impl.split('.')[-1]
|
||||
break
|
||||
info['specific_name'] = specific_name
|
||||
|
||||
# Generate description
|
||||
desc_parts = []
|
||||
if info['is_interface']:
|
||||
desc_parts.append('Interface')
|
||||
elif info['is_enum']:
|
||||
desc_parts.append('Enum')
|
||||
elif info['is_abstract']:
|
||||
desc_parts.append('Abstract')
|
||||
|
||||
if any(m in ['getInstance', 'create'] for m in info['key_methods']):
|
||||
desc_parts.append('Factory')
|
||||
elif any('Listener' in i for i in info['implements']):
|
||||
desc_parts.append('Listener')
|
||||
elif 'Exception' in str(info['extends']):
|
||||
desc_parts.append('Exception')
|
||||
elif len(info['key_methods']) >= 4:
|
||||
desc_parts.append('Manager')
|
||||
|
||||
info['description'] = ''.join(desc_parts) if desc_parts else None
|
||||
|
||||
return info
|
||||
|
||||
def generate_creative_name(class_info, pkg_name, fallback_letter, used_names):
|
||||
"""Generate unique semantic filename"""
|
||||
|
||||
def build_contextual_name(base_name):
|
||||
"""Add context to base name"""
|
||||
number_words = {'0': 'Zero', '1': 'One', '2': 'Two', '3': 'Three', '4': 'Four',
|
||||
'5': 'Five', '6': 'Six', '7': 'Seven', '8': 'Eight', '9': 'Nine'}
|
||||
sanitized_base = base_name
|
||||
for digit, word in number_words.items():
|
||||
sanitized_base = sanitized_base.replace(digit, word)
|
||||
|
||||
java_keywords = ['boolean', 'int', 'long', 'double', 'float', 'char', 'byte', 'short',
|
||||
'void', 'class', 'interface', 'Interface']
|
||||
if sanitized_base.lower() in java_keywords:
|
||||
sanitized_base = f"{sanitized_base}Type"
|
||||
|
||||
if sanitized_base + ".java" not in used_names:
|
||||
return sanitized_base + ".java"
|
||||
|
||||
# Add field type context
|
||||
if class_info.get('field_types'):
|
||||
for ft in class_info['field_types'][:5]:
|
||||
if len(ft) > 2 and ft.lower() not in base_name.lower():
|
||||
for digit, word in number_words.items():
|
||||
ft = ft.replace(digit, word)
|
||||
name = f"{sanitized_base}Using{ft}"
|
||||
if name + ".java" not in used_names:
|
||||
return name + ".java"
|
||||
|
||||
# Add method context
|
||||
if class_info.get('key_methods'):
|
||||
for method in class_info['key_methods'][:10]:
|
||||
if len(method) > 3 and method.lower() not in base_name.lower():
|
||||
name = f"{sanitized_base}{method.capitalize()}"
|
||||
if name + ".java" not in used_names:
|
||||
return name + ".java"
|
||||
|
||||
# Add package context
|
||||
pkg_clean = pkg_name.replace('_', '').capitalize()
|
||||
name = f"{sanitized_base}In{pkg_clean}Package"
|
||||
if name + ".java" not in used_names:
|
||||
return name + ".java"
|
||||
|
||||
return None
|
||||
|
||||
# Strategy 1: Specific inferred name
|
||||
if class_info.get('specific_name') and len(class_info['specific_name']) > 2:
|
||||
result = build_contextual_name(class_info['specific_name'])
|
||||
if result:
|
||||
return result
|
||||
|
||||
# Strategy 2: Actual class name
|
||||
if class_info['class_name'] and len(class_info['class_name']) > 2:
|
||||
java_keywords = ['boolean', 'int', 'long', 'double', 'float', 'char', 'byte', 'short',
|
||||
'void', 'class', 'interface', 'extends', 'implements']
|
||||
if class_info['class_name'].lower() not in java_keywords:
|
||||
result = build_contextual_name(class_info['class_name'])
|
||||
if result:
|
||||
return result
|
||||
|
||||
# Strategy 3: Description
|
||||
if class_info['description']:
|
||||
java_keywords = ['boolean', 'int', 'long', 'double', 'float', 'char', 'byte', 'short',
|
||||
'void', 'class', 'interface', 'Interface']
|
||||
if class_info['description'].lower() not in java_keywords:
|
||||
result = build_contextual_name(class_info['description'])
|
||||
if result:
|
||||
return result
|
||||
|
||||
# Strategy 4: Extends
|
||||
if class_info.get('extends') and len(class_info['extends']) > 2:
|
||||
result = build_contextual_name(f"{class_info['extends']}Extension")
|
||||
if result:
|
||||
return result
|
||||
|
||||
# Strategy 5: Implements
|
||||
if class_info.get('implements'):
|
||||
for impl in class_info['implements']:
|
||||
impl_name = impl.split('.')[-1]
|
||||
if len(impl_name) > 2:
|
||||
result = build_contextual_name(f"{impl_name}Impl")
|
||||
if result:
|
||||
return result
|
||||
|
||||
# Strategy 6: Technology + field type + method
|
||||
name_parts = []
|
||||
if class_info.get('uses_network'):
|
||||
name_parts.append('Network')
|
||||
elif class_info.get('uses_io'):
|
||||
name_parts.append('IO')
|
||||
elif class_info.get('uses_thread'):
|
||||
name_parts.append('Threaded')
|
||||
elif class_info.get('uses_swing'):
|
||||
name_parts.append('UI')
|
||||
|
||||
if class_info.get('field_types'):
|
||||
ft = class_info['field_types'][0]
|
||||
if len(ft) > 2:
|
||||
name_parts.append(ft)
|
||||
|
||||
if class_info.get('key_methods'):
|
||||
for method in class_info['key_methods'][:5]:
|
||||
if len(method) > 3 and not method.startswith('get') and not method.startswith('set'):
|
||||
name_parts.append(method.capitalize())
|
||||
break
|
||||
|
||||
if len(name_parts) > 0:
|
||||
result = build_contextual_name(''.join(name_parts))
|
||||
if result:
|
||||
return result
|
||||
|
||||
# Phonetic alphabet fallback
|
||||
phonetic = {'a': 'Alpha', 'b': 'Bravo', 'c': 'Charlie', 'd': 'Delta', 'e': 'Echo',
|
||||
'f': 'Foxtrot', 'g': 'Golf', 'h': 'Hotel', 'i': 'India', 'j': 'Juliet',
|
||||
'k': 'Kilo', 'l': 'Lima', 'm': 'Mike', 'n': 'November', 'o': 'Oscar',
|
||||
'p': 'Papa', 'q': 'Quebec', 'r': 'Romeo', 's': 'Sierra', 't': 'Tango',
|
||||
'u': 'Uniform', 'v': 'Victor', 'w': 'Whiskey', 'x': 'Xray', 'y': 'Yankee', 'z': 'Zulu'}
|
||||
|
||||
pkg_clean = pkg_name.replace('_', '').capitalize()
|
||||
phonetic_name = phonetic.get(fallback_letter.lower(), fallback_letter.capitalize())
|
||||
|
||||
candidates = [
|
||||
f"{pkg_clean}Interface{phonetic_name}",
|
||||
f"{pkg_clean}Component{phonetic_name}",
|
||||
f"{pkg_clean}{phonetic_name}Impl"
|
||||
]
|
||||
|
||||
for candidate in candidates:
|
||||
if candidate + ".java" not in used_names:
|
||||
return candidate + ".java"
|
||||
|
||||
return f"{pkg_clean}{phonetic_name}Variant.java"
|
||||
|
||||
def generate_smart_mapping(base_dir):
|
||||
"""Generate semantic mapping for all obfuscated files"""
|
||||
print("Generating smart semantic mapping...")
|
||||
|
||||
mapping = {}
|
||||
global_used_names = set()
|
||||
stats = {'class_names': 0, 'descriptions': 0, 'fallbacks': 0}
|
||||
|
||||
obfuscated_dir = os.path.join(base_dir, 'app', 'obfuscated_packages')
|
||||
|
||||
for pkg_dir in sorted(os.listdir(obfuscated_dir)):
|
||||
pkg_path = os.path.join(obfuscated_dir, pkg_dir)
|
||||
if not os.path.isdir(pkg_path):
|
||||
continue
|
||||
|
||||
java_files = [f for f in os.listdir(pkg_path) if f.endswith('.java') and len(f) == 6]
|
||||
|
||||
for java_file in sorted(java_files):
|
||||
filepath = os.path.join(pkg_path, java_file)
|
||||
rel_path = os.path.relpath(filepath, base_dir)
|
||||
|
||||
if len(java_file.replace('.java', '')) > 1:
|
||||
continue
|
||||
|
||||
try:
|
||||
with open(filepath, 'r', encoding='utf-8', errors='ignore') as f:
|
||||
content = f.read()
|
||||
|
||||
class_info = extract_class_info(rel_path, content)
|
||||
|
||||
old_name = java_file.replace('.java', '')
|
||||
new_name = generate_creative_name(class_info, pkg_dir, old_name, global_used_names)
|
||||
|
||||
global_used_names.add(new_name)
|
||||
|
||||
new_rel_path = os.path.join('app', 'obfuscated_packages', pkg_dir, new_name)
|
||||
mapping[rel_path] = new_rel_path
|
||||
|
||||
if class_info.get('specific_name') or (class_info['class_name'] and len(class_info['class_name']) > 2):
|
||||
stats['class_names'] += 1
|
||||
elif class_info.get('description'):
|
||||
stats['descriptions'] += 1
|
||||
else:
|
||||
stats['fallbacks'] += 1
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error processing {rel_path}: {e}")
|
||||
|
||||
print(f"\n✅ Generated {len(mapping)} semantic names")
|
||||
print(f" Class names: {stats['class_names']}")
|
||||
print(f" Descriptions: {stats['descriptions']}")
|
||||
print(f" Fallbacks: {stats['fallbacks']}")
|
||||
|
||||
return mapping
|
||||
|
||||
# ============================================================================
|
||||
# PART 2: IMPORT DEPENDENCY ANALYSIS
|
||||
# ============================================================================
|
||||
|
||||
def analyze_imports(base_dir, rename_mapping):
|
||||
"""Analyze all imports to track dependencies"""
|
||||
print("\nAnalyzing import dependencies...")
|
||||
|
||||
obfuscated_class_map = {}
|
||||
obfuscated_simple_map = {}
|
||||
|
||||
for old_path in rename_mapping.keys():
|
||||
parts = old_path.split('/')
|
||||
if 'obfuscated_packages' in parts:
|
||||
try:
|
||||
full_path = os.path.join(base_dir, old_path)
|
||||
with open(full_path, 'r', encoding='utf-8', errors='ignore') as f:
|
||||
content = f.read()
|
||||
|
||||
pkg_match = re.search(r'package\s+([\w.]+);', content)
|
||||
class_match = re.search(r'(?:public\s+)?(?:class|interface|enum)\s+(\w+)', content)
|
||||
|
||||
if pkg_match and class_match:
|
||||
full_package = pkg_match.group(1)
|
||||
class_name = class_match.group(1)
|
||||
qualified_name = f"{full_package}.{class_name}"
|
||||
|
||||
obfuscated_class_map[qualified_name] = {
|
||||
'old_path': old_path,
|
||||
'new_path': rename_mapping[old_path],
|
||||
'package': full_package,
|
||||
'old_classname': class_name
|
||||
}
|
||||
|
||||
if class_name not in obfuscated_simple_map:
|
||||
obfuscated_simple_map[class_name] = []
|
||||
obfuscated_simple_map[class_name].append(qualified_name)
|
||||
except:
|
||||
pass
|
||||
|
||||
# Analyze imports in all files
|
||||
import_dependencies = {}
|
||||
files_to_update = {}
|
||||
|
||||
java_files = list(Path(base_dir).rglob('*.java'))
|
||||
|
||||
for filepath in java_files:
|
||||
rel_path = str(filepath.relative_to(base_dir))
|
||||
|
||||
try:
|
||||
with open(filepath, 'r', encoding='utf-8', errors='ignore') as f:
|
||||
content = f.read()
|
||||
|
||||
imports = re.findall(r'import\s+([\w.]+);', content)
|
||||
|
||||
if not imports:
|
||||
continue
|
||||
|
||||
obfuscated_imports = []
|
||||
for imp in imports:
|
||||
if imp in obfuscated_class_map:
|
||||
obfuscated_imports.append({
|
||||
'import_statement': imp,
|
||||
'old_path': obfuscated_class_map[imp]['old_path'],
|
||||
'new_path': obfuscated_class_map[imp]['new_path'],
|
||||
'old_package': obfuscated_class_map[imp]['package'],
|
||||
'old_classname': obfuscated_class_map[imp]['old_classname']
|
||||
})
|
||||
else:
|
||||
class_part = imp.split('.')[-1]
|
||||
if class_part in obfuscated_simple_map:
|
||||
for qualified in obfuscated_simple_map[class_part]:
|
||||
if imp == qualified:
|
||||
obfuscated_imports.append({
|
||||
'import_statement': imp,
|
||||
'old_path': obfuscated_class_map[qualified]['old_path'],
|
||||
'new_path': obfuscated_class_map[qualified]['new_path'],
|
||||
'old_package': obfuscated_class_map[qualified]['package'],
|
||||
'old_classname': obfuscated_class_map[qualified]['old_classname']
|
||||
})
|
||||
|
||||
if obfuscated_imports:
|
||||
import_dependencies[rel_path] = {
|
||||
'all_imports': imports,
|
||||
'obfuscated_imports': obfuscated_imports
|
||||
}
|
||||
files_to_update[rel_path] = obfuscated_imports
|
||||
except:
|
||||
pass
|
||||
|
||||
print(f"✅ Found {len(obfuscated_class_map)} obfuscated classes")
|
||||
print(f"✅ Found {len(files_to_update)} files with imports to update")
|
||||
|
||||
return {
|
||||
'obfuscated_class_map': obfuscated_class_map,
|
||||
'import_dependencies': import_dependencies,
|
||||
'files_to_update': files_to_update
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# PART 3: BATCH EXECUTION
|
||||
# ============================================================================
|
||||
|
||||
def execute_rename(base_dir, mapping_file):
|
||||
"""Execute the batch rename and import updates"""
|
||||
print(f"\nLoading mapping from {mapping_file}...")
|
||||
|
||||
with open(mapping_file, 'r') as f:
|
||||
data = json.load(f)
|
||||
|
||||
rename_mapping = data['rename_mapping']
|
||||
files_to_update = data['files_to_update']
|
||||
|
||||
print(f"\n{'='*60}")
|
||||
print(f"Ready to rename {len(rename_mapping)} files")
|
||||
print(f"Ready to update imports in {len(files_to_update)} files")
|
||||
print(f"{'='*60}")
|
||||
|
||||
response = input("\nProceed with rename? (yes/no): ")
|
||||
if response.lower() != 'yes':
|
||||
print("Cancelled.")
|
||||
return
|
||||
|
||||
# TODO: Implement actual rename and import update logic
|
||||
print("\n⚠️ Rename execution not yet implemented")
|
||||
print("This would:")
|
||||
print("1. Rename all files according to mapping")
|
||||
print("2. Update class names inside files")
|
||||
print("3. Update all import statements")
|
||||
print("4. Verify no broken references")
|
||||
|
||||
# ============================================================================
|
||||
# MAIN
|
||||
# ============================================================================
|
||||
|
||||
def main():
|
||||
base_dir = '/home/rewrich/Documents/GitHub/tustu'
|
||||
output_file = 'COMPLETE_RENAME_MAPPING.json'
|
||||
|
||||
print("="*60)
|
||||
print("OBFUSCATED JAVA FILE RENAMER")
|
||||
print("="*60)
|
||||
|
||||
# Generate mapping
|
||||
rename_mapping = generate_smart_mapping(base_dir)
|
||||
|
||||
# Analyze imports
|
||||
import_analysis = analyze_imports(base_dir, rename_mapping)
|
||||
|
||||
# Combine into complete mapping
|
||||
complete_mapping = {
|
||||
'rename_mapping': rename_mapping,
|
||||
'obfuscated_class_map': import_analysis['obfuscated_class_map'],
|
||||
'import_dependencies': import_analysis['import_dependencies'],
|
||||
'files_to_update': import_analysis['files_to_update'],
|
||||
'statistics': {
|
||||
'total_files': len(rename_mapping),
|
||||
'obfuscated_classes': len(import_analysis['obfuscated_class_map']),
|
||||
'files_with_imports': len(import_analysis['import_dependencies']),
|
||||
'files_needing_updates': len(import_analysis['files_to_update'])
|
||||
}
|
||||
}
|
||||
|
||||
# Save
|
||||
with open(output_file, 'w') as f:
|
||||
json.dump(complete_mapping, f, indent=2)
|
||||
|
||||
print(f"\n{'='*60}")
|
||||
print(f"Complete mapping saved to: {output_file}")
|
||||
print(f"{'='*60}")
|
||||
print(f"Total files to rename: {complete_mapping['statistics']['total_files']}")
|
||||
print(f"Obfuscated classes tracked: {complete_mapping['statistics']['obfuscated_classes']}")
|
||||
print(f"Files needing import updates: {complete_mapping['statistics']['files_needing_updates']}")
|
||||
|
||||
# Option to execute
|
||||
print(f"\n{'='*60}")
|
||||
print("To execute rename, run:")
|
||||
print(f" python3 {__file__} --execute")
|
||||
print(f"{'='*60}")
|
||||
|
||||
if __name__ == '__main__':
|
||||
import sys
|
||||
if '--execute' in sys.argv:
|
||||
execute_rename('/home/rewrich/Documents/GitHub/tustu', 'COMPLETE_RENAME_MAPPING.json')
|
||||
else:
|
||||
main()
|
||||
Reference in New Issue
Block a user