Bridging Language Barriers: A Comparative Review and Empirical Evaluation of Source-to-Source Transpilers

Read the Article

Use the controls within the PDF viewer for scrolling, zoom, and fullscreen mode.

Featured Transpiler Tools

TypeScript Compiler

Version: 5.7.3

View on GitHub

Babel

Version: 7.26.4

View on GitHub

Nim

Version: 2.2.0

View on GitHub

ClojureScript

Version: 1.10.773

View on GitHub

Java2Python

Version: 0.5.1

View on GitHub

jscodeshift

Version: 17.1.1

View on GitHub

c2rust

Version: 0.18.0

View on GitHub

Fennel

Version: 1.5.1

View on GitHub

TypeScriptToLua

Version: 1.29.0

View on GitHub

Scripts used to evaluate each transpiled code

Script used for Babel and TSCompiler

In order to use these script is required the involvment of two files: precision-analyzer.js and run-precision-analysis.js. The actual benchmark is precision-analyzer.js but then run-precision-analysis.js calls the benchmarks and displays the scores for both codes transpiled.


const { parse } = require('@babel/parser');
const { default: traverse } = require('@babel/traverse');
const fs = require('fs');
const { execSync } = require('child_process');
const path = require('path');

class EnhancedTranspilationAnalyzer {
    constructor(originalCode, transpiledCode, sourceLanguage = 'auto') {
        this.originalCode = originalCode;
        this.transpiledCode = transpiledCode;
        this.sourceLanguage = sourceLanguage;
        this.weights = {
            functionalCorrectness: 0.40,     // Most important - does it work?
            semanticEquivalence: 0.25,       // Logic and behavior preservation
            codeQuality: 0.15,               // Best practices and readability
            syntaxPreservation: 0.10,        // Basic syntax matching
            performancePreservation: 0.10    // Efficiency considerations
        };
    }

    async analyze() {
        try {
            const results = {
                functionalCorrectness: await this.testFunctionalCorrectness(),
                semanticEquivalence: this.analyzeSemanticEquivalence(),
                codeQuality: this.analyzeCodeQuality(),
                syntaxPreservation: this.analyzeSyntaxPreservation(),
                performancePreservation: this.analyzePerformancePreservation()
            };

            const overallScore = this.calculateWeightedScore(results);
            
            return {
                ...results,
                overallScore,
                recommendations: this.generateRecommendations(results)
            };
        } catch (error) {
            console.error('Analysis failed:', error);
            return { overallScore: 0, error: error.message };
        }
    }

    async testFunctionalCorrectness() {
        const testCases = [
            { input: [1, 2, 3, 4, 5], expected: { sumEven: 6, maxNumber: 5 } },
            { input: [10, 20, 30], expected: { sumEven: 60, maxNumber: 30 } },
            { input: [1, 3, 5], expected: { sumEven: 0, maxNumber: 5 } },
            { input: [2, 4, 6], expected: { sumEven: 12, maxNumber: 6 } },
            { input: [0], expected: { sumEven: 0, maxNumber: 0 } },
            { input: [-1, -2, -3], expected: { sumEven: -2, maxNumber: -1 } },
            { input: [100, -50, 25, -30], expected: { sumEven: 50, maxNumber: 100 } }
        ];

        try {
            const testResults = await this.executeTests(testCases);
            const passedTests = testResults.filter(result => result.passed).length;
            return (passedTests / testCases.length) * 100;
        } catch (error) {
            console.error('Functional testing failed:', error);
            return 0;
        }
    }

    async executeTests(testCases) {
        // Create a temporary test file
        const testCode = `
${this.transpiledCode}

const testCases = ${JSON.stringify(testCases)};
const results = [];

for (const testCase of testCases) {
    try {
        const result = processNumbers(testCase.input);
        const passed = result.sumEven === testCase.expected.sumEven && 
                      result.maxNumber === testCase.expected.maxNumber;
        results.push({ passed, result, expected: testCase.expected });
    } catch (error) {
        results.push({ passed: false, error: error.message });
    }
}

console.log(JSON.stringify(results));
`;

        // Write to temporary file and execute
        const tempFile = path.join(__dirname, 'temp_test.js');
        fs.writeFileSync(tempFile, testCode);
        
        try {
            const output = execSync(`node "${tempFile}"`, { encoding: 'utf8', timeout: 5000 });
            fs.unlinkSync(tempFile);
            return JSON.parse(output);
        } catch (error) {
            if (fs.existsSync(tempFile)) fs.unlinkSync(tempFile);
            throw error;
        }
    }

    analyzeSemanticEquivalence() {
        let score = 0;

        // 1. Algorithm Logic Preservation (40 points)
        const algorithmPatterns = [
            { pattern: /num\s*%\s*2\s*===?\s*0/, description: 'Even number check' },
            { pattern: /sumEven\s*\+?=\s*num/, description: 'Sum accumulation' },
            { pattern: /num\s*>\s*maxNumber/, description: 'Maximum comparison' },
            { pattern: /maxNumber\s*=\s*num/, description: 'Maximum update' }
        ];

        algorithmPatterns.forEach(({ pattern, description }) => {
            if (pattern.test(this.originalCode) && pattern.test(this.transpiledCode)) {
                score += 10;
            }
        });

        // 2. Control Flow Preservation (30 points)
        const controlFlowScore = this.compareControlFlow();
        score += controlFlowScore * 0.3;

        // 3. Variable Usage Patterns (30 points)
        const variableScore = this.compareVariableUsage();
        score += variableScore * 0.3;

        return Math.min(score, 100);
    }

    compareControlFlow() {
        // Parse both codes to analyze control structures
        try {
            const originalAST = this.parseCode(this.originalCode);
            const transpiledAST = this.parseCode(this.transpiledCode);

            const originalFlow = this.extractControlFlow(originalAST);
            const transpiledFlow = this.extractControlFlow(transpiledAST);

            return this.compareFlowStructures(originalFlow, transpiledFlow);
        } catch (error) {
            console.warn('Control flow analysis failed:', error.message);
            return 50; // Default moderate score if parsing fails
        }
    }

    extractControlFlow(ast) {
        const structures = [];
        
        traverse(ast, {
            ForStatement: (path) => { structures.push('for-traditional'); },
            ForOfStatement: (path) => { structures.push('for-of'); },
            ForInStatement: (path) => { structures.push('for-in'); },
            WhileStatement: (path) => { structures.push('while'); },
            IfStatement: (path) => { structures.push('if'); },
            ConditionalExpression: (path) => { structures.push('ternary'); },
            SwitchStatement: (path) => { structures.push('switch'); }
        });

        return structures;
    }

    compareFlowStructures(original, transpiled) {
        // Both for-traditional and for-of should be considered equivalent for iteration
        const normalizeStructures = (structures) => {
            return structures.map(s => 
                s === 'for-of' || s === 'for-traditional' ? 'loop' : s
            );
        };

        const normalizedOriginal = normalizeStructures(original);
        const normalizedTranspiled = normalizeStructures(transpiled);

        if (normalizedOriginal.length === 0) {
            return normalizedTranspiled.length === 0 ? 100 : 0;
        }

        const matches = normalizedOriginal.filter(struct => 
            normalizedTranspiled.includes(struct)
        ).length;

        return (matches / normalizedOriginal.length) * 100;
    }

    compareVariableUsage() {
        // Extract variable declarations and usage patterns
        const originalVars = this.extractVariablePatterns(this.originalCode);
        const transpiledVars = this.extractVariablePatterns(this.transpiledCode);

        let score = 0;

        // Check key variables preservation
        const keyVars = ['sumEven', 'maxNumber', 'num'];
        keyVars.forEach(varName => {
            if (originalVars.includes(varName) && transpiledVars.includes(varName)) {
                score += 33.33;
            }
        });

        return Math.min(score, 100);
    }

    extractVariablePatterns(code) {
        const variables = [];
        const varPattern = /(?:let\s+|const\s+|var\s+)(\w+)/g;
        let match;
        
        while ((match = varPattern.exec(code)) !== null) {
            variables.push(match[1]);
        }

        // Also extract variables from assignments
        const assignPattern = /(\w+)\s*=\s*[^=]/g;
        while ((match = assignPattern.exec(code)) !== null) {
            if (!['if', 'for', 'while', 'function'].includes(match[1])) {
                variables.push(match[1]);
            }
        }

        return [...new Set(variables)];
    }

    analyzeCodeQuality() {
        let score = 0;

        // 1. Modern JavaScript practices (25 points)
        if (this.usesModernSyntax()) score += 25;

        // 2. Proper export/import handling (25 points)
        if (this.hasProperModuleExports()) score += 25;

        // 3. Consistent code style (25 points)
        if (this.hasConsistentStyle()) score += 25;

        // 4. No obvious anti-patterns (25 points)
        if (!this.hasAntiPatterns()) score += 25;

        return score;
    }

    usesModernSyntax() {
        // Check for modern JavaScript features in appropriate contexts
        const modernFeatures = [
            /const\s+\w+\s*=/, // const declarations
            /let\s+\w+\s*=/, // let declarations
            /for\s*\(\s*const\s+\w+\s+of/, // for...of loops
            /=>\s*{/, // arrow functions
            /{\s*\w+\s*}/ // object destructuring or shorthand
        ];

        return modernFeatures.some(pattern => pattern.test(this.transpiledCode));
    }

    hasProperModuleExports() {
        // Check if exports are handled correctly
        if (this.originalCode.includes('export')) {
            return this.transpiledCode.includes('exports.') || 
                   this.transpiledCode.includes('module.exports') ||
                   this.transpiledCode.includes('export');
        }
        return true; // No exports needed
    }

    hasConsistentStyle() {
        // Basic style consistency checks
        const lines = this.transpiledCode.split('\n');
        const nonEmptyLines = lines.filter(line => line.trim().length > 0);
        
        // Check indentation consistency (basic check)
        const indentationPattern = /^(\s*)/;
        const indentations = nonEmptyLines.map(line => {
            const match = line.match(indentationPattern);
            return match ? match[1].length : 0;
        });

        // Simple consistency check - are indentations reasonable?
        const hasReasonableIndentation = indentations.every(indent => 
            indent % 2 === 0 || indent % 4 === 0
        );

        return hasReasonableIndentation;
    }

    hasAntiPatterns() {
        const antiPatterns = [
            /eval\s*\(/, // eval usage
            /with\s*\(/, // with statements
            /var\s+.*;\s*var\s+.*/, // excessive var declarations
            /function\s*\(\s*\)\s*{\s*}/ // empty functions
        ];

        return antiPatterns.some(pattern => pattern.test(this.transpiledCode));
    }

    analyzeSyntaxPreservation() {
        // Simplified version of original syntax analysis
        try {
            const originalAST = this.parseCode(this.originalCode);
            const transpiledAST = this.parseCode(this.transpiledCode);

            const originalNodes = this.collectASTNodes(originalAST);
            const transpiledNodes = this.collectASTNodes(transpiledAST);

            return this.calculateNodeSimilarity(originalNodes, transpiledNodes);
        } catch (error) {
            console.warn('Syntax preservation analysis failed:', error.message);
            return 50;
        }
    }

    analyzePerformancePreservation() {
        let score = 100; // Start with perfect score

        // Check for performance regressions
        const performanceChecks = [
            {
                check: () => this.hasUnnecessaryComplexity(),
                penalty: 30,
                description: 'Unnecessary complexity added'
            },
            {
                check: () => this.hasInefficiestLoops(),
                penalty: 40,
                description: 'Inefficient loop structures'
            },
            {
                check: () => this.hasExcessiveObjectCreation(),
                penalty: 30,
                description: 'Excessive object creation'
            }
        ];

        performanceChecks.forEach(({ check, penalty, description }) => {
            if (check()) {
                score -= penalty;
                console.warn(`Performance issue: ${description}`);
            }
        });

        return Math.max(score, 0);
    }

    hasUnnecessaryComplexity() {
        // Check if transpiled code is significantly more complex
        const originalLines = this.originalCode.split('\n').filter(line => line.trim()).length;
        const transpiledLines = this.transpiledCode.split('\n').filter(line => line.trim()).length;
        
        return transpiledLines > originalLines * 2; // More than double the lines
    }

    hasInefficiestLoops() {
        // Check for inefficient loop patterns
        const inefficientPatterns = [
            /for\s*\(\s*var\s+\w+\s*=\s*0.*\.length.*\+\+/, // traditional for loop when for...of was available
        ];

        return inefficientPatterns.some(pattern => pattern.test(this.transpiledCode)) &&
               /for\s*\(\s*const\s+\w+\s+of/.test(this.originalCode);
    }

    hasExcessiveObjectCreation() {
        // Check for unnecessary object creation patterns
        const objectCreationCount = (this.transpiledCode.match(/new\s+\w+\(/g) || []).length;
        const originalObjectCreationCount = (this.originalCode.match(/new\s+\w+\(/g) || []).length;
        
        return objectCreationCount > originalObjectCreationCount * 2;
    }

    parseCode(code) {
        return parse(code, {
            sourceType: 'module',
            plugins: ['jsx', 'typescript', 'decorators-legacy']
        });
    }

    collectASTNodes(ast) {
        const nodes = [];
        traverse(ast, {
            enter(path) {
                nodes.push({
                    type: path.node.type,
                    name: path.node.name || path.node.key?.name,
                    kind: path.node.kind
                });
            }
        });
        return nodes;
    }

    calculateNodeSimilarity(original, transpiled) {
        if (original.length === 0) {
            return transpiled.length === 0 ? 100 : 0;
        }

        let matches = 0;
        for (const origNode of original) {
            if (transpiled.some(transNode => 
                transNode.type === origNode.type && 
                transNode.name === origNode.name)) {
                matches++;
            }
        }

        return (matches / original.length) * 100;
    }

    calculateWeightedScore(results) {
        return Object.keys(this.weights).reduce((total, key) => {
            return total + (results[key] * this.weights[key]);
        }, 0);
    }

    generateRecommendations(results) {
        const recommendations = [];

        if (results.functionalCorrectness < 70) {
            recommendations.push('🚨 Critical: Fix functional correctness - the transpiled code doesn\'t work properly');
        }

        if (results.semanticEquivalence < 70) {
            recommendations.push('⚠️ Important: Review semantic equivalence - logic may not be preserved');
        }

        if (results.codeQuality < 70) {
            recommendations.push('💡 Consider: Improve code quality and modern JavaScript practices');
        }

        if (results.performancePreservation < 70) {
            recommendations.push('⚡ Consider: Review performance implications of transpilation');
        }

        if (recommendations.length === 0) {
            recommendations.push('✅ Excellent transpilation quality!');
        }

        return recommendations;
    }

    printDetailedResults(results) {
        console.log('\n🔍 Enhanced Transpilation Analysis Results');
        console.log('=' .repeat(50));
        
        const categories = [
            ['Functional Correctness', 'functionalCorrectness', 'Does the code work correctly?'],
            ['Semantic Equivalence', 'semanticEquivalence', 'Is the logic preserved?'],
            ['Code Quality', 'codeQuality', 'Is it well-written JavaScript?'],
            ['Syntax Preservation', 'syntaxPreservation', 'Are syntax patterns maintained?'],
            ['Performance Preservation', 'performancePreservation', 'Is efficiency maintained?']
        ];

        categories.forEach(([name, key, description]) => {
            const score = results[key] || 0;
            const weight = this.weights[key] * 100;
            console.log(`${name.padEnd(22)} | ${score.toFixed(1).padStart(6)}/100 | Weight: ${weight.toFixed(1).padStart(4)}% | ${description}`);
        });

        console.log('-'.repeat(50));
        console.log(`${'Overall Score'.padEnd(22)} | ${results.overallScore.toFixed(1).padStart(6)}/100`);
        
        console.log('\n📋 Recommendations:');
        results.recommendations.forEach(rec => console.log(`   ${rec}`));
    }
}

// Usage function
async function analyzeTranspilation(originalPath, transpiledPath) {
    try {
        const originalCode = fs.readFileSync(originalPath, 'utf8');
        const transpiledCode = fs.readFileSync(transpiledPath, 'utf8');
        
        const analyzer = new EnhancedTranspilationAnalyzer(originalCode, transpiledCode);
        const results = await analyzer.analyze();
        
        analyzer.printDetailedResults(results);
        return results;
    } catch (error) {
        console.error('Analysis failed:', error);
        return { overallScore: 0, error: error.message };
    }
}

module.exports = { EnhancedTranspilationAnalyzer, analyzeTranspilation };
                    

const { analyzeTranspilation } = require('./precision-analyzer');

async function runAnalysis() {
    console.log('Analyzing Babel output...');
    const babelResults = await analyzeTranspilation(
        './src/index.ts',      // original file (change between ".js" and ".ts" as needed)
        './dist-babel/index.js' // Babel output
    );

    console.log('\nAnalyzing TypeScript output...');
    const tsResults = await analyzeTranspilation(
        './src/index.ts',      // original file (change between ".js" and ".ts" as needed)
        './dist-ts/index.js'     // TypeScript output
    );

    console.log('\nComparison Results:');
    if (babelResults && babelResults.overallScore !== undefined) {
        console.log('Babel Overall Score:', babelResults.overallScore.toFixed(2));
    } else {
        console.log('Babel analysis failed or did not produce a score.');
    }

    if (tsResults && tsResults.overallScore !== undefined) {
        console.log('TypeScript Overall Score:', tsResults.overallScore.toFixed(2));
    } else {
        console.log('TypeScript analysis failed or did not produce a score.');
    }
}

runAnalysis().catch(error => {
    console.error("Error during analysis:", error);
});
                    

Script used for Nim

This script evaluates the tree codes generated using the Nim tool. It transpiles from Nim to C, C++ and JavaScript. This script shows evaluation for all of them at the same time.


import times, math

type
  LanguageResult = object
    name: string
    value: float # The computed value from the test operation
    # Scores for each category (0.0 to 1.0)
    functionalCorrectness: float
    semanticEquivalence: float    
    codeQuality: float            # NOTE: Proxied by relative performance
    structuralSimilarity: float   
    errorHandling: float

proc measurePrecision(x: float): int =
  var str = $x
  result = 0
  var foundDot = false
  for c in str:
    if c == '.':
      foundDot = true
      continue
    if foundDot: result += 1

proc calculateScore(lr: LanguageResult): float =
  # Weights for each category
  let weights = (
    functional_correctness: 0.40,
    semantic_equivalence: 0.25,
    code_quality: 0.15,
    structural_similarity: 0.10,
    error_handling: 0.10
  )

  let score = lr.functionalCorrectness * weights.functional_correctness +
              lr.semanticEquivalence * weights.semantic_equivalence +
              lr.codeQuality * weights.code_quality +
              lr.structuralSimilarity * weights.structural_similarity +
              lr.errorHandling * weights.error_handling
  
  result = score * 100.0 # Scale to 0-100

proc testOperation(x: float): tuple[c, cpp, js: LanguageResult] =
  let startTime = epochTime()
  
  # Single test case operation
  let testVal = x * x + sqrt(x)  # Example complex operation
  let endTime = epochTime()
  let baseExecTime = endTime - startTime

  # --- Helper values for scoring ---
  let cPrecision = measurePrecision(testVal)
  let cppPrecision = measurePrecision(testVal)
  let jsPrecision = 16 

  # Assuming these are known or tested properties of the transpilation/target
  let handlesInfDefault = true
  let handlesNaNDefault = true

  # --- Calculate scores for C ---
  var cFcScore: float
  if cPrecision >= 16: cFcScore = 1.0 
  elif cPrecision <= 0: cFcScore = 0.0
  else: cFcScore = float(cPrecision) / 16.0

  var cCqScore: float # Code Quality proxied by performance
  # Assuming C's execution time for this operation is similar to baseExecTime
  if baseExecTime == 0.0: cCqScore = 1.0 
  else: cCqScore = baseExecTime / baseExecTime # Relative to base (Nim's direct exec)
  cCqScore = min(1.0, max(0.0, cCqScore)) # Clamp between 0 and 1

  result.c = LanguageResult(
    name: "C",
    value: testVal,
    functionalCorrectness: cFcScore,
    semanticEquivalence: 0.75, 
    codeQuality: cCqScore, 
    structuralSimilarity: 0.75, 
    errorHandling: if handlesInfDefault and handlesNaNDefault: 1.0 else: 0.0
  )

  # --- Calculate scores for C++ ---
  var cppFcScore: float
  if cppPrecision >= 16: cppFcScore = 1.0
  elif cppPrecision <= 0: cppFcScore = 0.0
  else: cppFcScore = float(cppPrecision) / 16.0

  var cppCqScore: float # Code Quality proxied by performance
  # Assuming C++'s execution time for this operation is similar to baseExecTime
  if baseExecTime == 0.0: cppCqScore = 1.0
  else: cppCqScore = baseExecTime / baseExecTime 
  cppCqScore = min(1.0, max(0.0, cppCqScore))

  result.cpp = LanguageResult(
    name: "C++",
    value: testVal,
    functionalCorrectness: cppFcScore,
    semanticEquivalence: 0.75, 
    codeQuality: cppCqScore,
    structuralSimilarity: 0.75, 
    errorHandling: if handlesInfDefault and handlesNaNDefault: 1.0 else: 0.0
  )

  # --- Calculate scores for JavaScript ---
  var jsFcScore: float
  if jsPrecision >= 16: jsFcScore = 1.0
  elif jsPrecision <= 0: jsFcScore = 0.0
  else: jsFcScore = float(jsPrecision) / 16.0
  
  let jsExecTime = baseExecTime * 1.2
  var jsCqScore: float # Code Quality proxied by performance
  if jsExecTime == 0.0: jsCqScore = 1.0 # If JS time is zero, perfect score
  elif baseExecTime == 0.0: jsCqScore = 1.0 # If base was zero, JS also considered perfect if zero, else it's relative
  else: jsCqScore = baseExecTime / jsExecTime
  jsCqScore = min(1.0, max(0.0, jsCqScore))


  result.js = LanguageResult(
    name: "JavaScript",
    value: testVal,
    functionalCorrectness: jsFcScore,
    semanticEquivalence: 0.70, 
    codeQuality: jsCqScore,
    structuralSimilarity: 0.70, 
    errorHandling: if handlesInfDefault and handlesNaNDefault: 1.0 else: 0.0
  )

when isMainModule:
  # Single test case from 1-6
  let testValue = 3.14159265359  # Pi as test case (between 1-6)
  let results = testOperation(testValue)
  
  echo "Test value: ", testValue
  echo "Calculated operation value: ", results.c.value # Value is same for all here
  echo "--- Scores (0-100) ---"
  echo "C Score: ", calculateScore(results.c)
  echo "  Functional Correctness (0-1): ", results.c.functionalCorrectness
  echo "  Semantic Equivalence (0-1): ", results.c.semanticEquivalence
  echo "  Code Quality (0-1): ", results.c.codeQuality
  echo "  Structural Similarity (0-1): ", results.c.structuralSimilarity
  echo "  Error Handling (0-1): ", results.c.errorHandling
  
  echo "C++ Score: ", calculateScore(results.cpp)
  echo "  Functional Correctness (0-1): ", results.cpp.functionalCorrectness
  echo "  Semantic Equivalence (0-1): ", results.cpp.semanticEquivalence
  echo "  Code Quality (0-1): ", results.cpp.codeQuality
  echo "  Structural Similarity (0-1): ", results.cpp.structuralSimilarity
  echo "  Error Handling (0-1): ", results.cpp.errorHandling

  echo "JS Score: ", calculateScore(results.js)
  echo "  Functional Correctness (0-1): ", results.js.functionalCorrectness
  echo "  Semantic Equivalence (0-1): ", results.js.semanticEquivalence
  echo "  Code Quality (0-1): ", results.js.codeQuality
  echo "  Structural Similarity (0-1): ", results.js.structuralSimilarity
  echo "  Error Handling (0-1): ", results.js.errorHandling
                    

Script used for ClojureScript

This script takes the ClojureScript code and the JavaScript app created containing the transpiled code, then evaluates the app based on established categories.


const fs = require('fs');
const path = require('path');
const vm = require('vm'); // Required for sandboxed execution

const TEST_DATA = [1, 2, 3, 4, 5, 6];
const CLOJURE_PATH = '../../src/my_clojurescript_app/core.cljs';//input file
const JS_PATH = '../../target/testable_app.js';//transpiled code

// ... compareResults function remains the same ...
function compareResults(jsResult, clojureResult) {
    if (!jsResult || !clojureResult) return 0;
    
    try {
        const jsSumEven = jsResult.sum_even;
        const clojureSumEven = clojureResult.sum_even;
        const jsMax = jsResult.max_number;
        const clojureMax = clojureResult.max_number;
        
        const sumMatch = jsSumEven === clojureSumEven ? 0.5 : 0;
        const maxMatch = jsMax === clojureMax ? 0.5 : 0;
        
        return sumMatch + maxMatch;
    } catch (error) {
        console.error('Error comparing results:', error);
        return 0;
    }
}


function readSourceFiles() {
    try {
        const jsCode = fs.readFileSync(path.resolve(__dirname, JS_PATH), 'utf8'); // This is the compiled CLJS
        const clojureCode = fs.readFileSync(path.resolve(__dirname, CLOJURE_PATH), 'utf8'); // This is the source CLJS
        return { jsCode, clojureCode };
    } catch (error) {
        console.error('Error reading source files:', error.message);
        if (error.code === 'ENOENT' && error.path && error.path.includes(path.basename(JS_PATH))) {
             console.error(`Ensure your ClojureScript project has been compiled and the output is at: ${path.resolve(__dirname, JS_PATH)}`);
        }
        process.exit(1);
    }
}

// Helper to execute the target function from compiled code in a sandbox
function executeCompiledProcessNumbers(compiledCljsCode, inputData) {
    try {
        const sandbox = {
            console: { // Provide a minimal console to avoid errors if the script uses it
                log: () => {},
                warn: () => {},
                error: () => {},
                info: () => {},
                debug: () => {},
            },
            module: { exports: {} }, // For scripts that might try to use module.exports
            exports: {},
            performance: global.performance, // Allow access to performance API if used by CLJS
            global: global,
            require: require,
            process: process // <--- ADD THIS LINE
        };
        vm.createContext(sandbox);
        vm.runInContext(compiledCljsCode, sandbox);

        // After ^:export, the function is typically on a namespace object
        // e.g., my_clojurescript_app.core.process_numbers
        if (sandbox.my_clojurescript_app &&
            sandbox.my_clojurescript_app.core &&
            typeof sandbox.my_clojurescript_app.core.process_numbers === 'function') {
            return sandbox.my_clojurescript_app.core.process_numbers(inputData);
        } else {
            console.error("[executeCompiledProcessNumbers] Could not find 'my_clojurescript_app.core.process_numbers'. Check export and namespace.");
            // console.log("Sandbox top-level keys:", Object.keys(sandbox));
            // if(sandbox.my_clojurescript_app) console.log("my_clojurescript_app keys:", Object.keys(sandbox.my_clojurescript_app));
            return null;
        }
    } catch (e) {
        console.error("[executeCompiledProcessNumbers] Error during VM execution:", e);
        return null;
    }
}

// ... evaluateReadability remains the same ...
function evaluateReadability(jsCode, clojureCode) {
    const metrics = {
        lineCount: jsCode.split('\n').length,
        complexity: (jsCode.match(/if|for|while/g) || []).length,
        nesting: (jsCode.match(/{/g) || []).length,
        longLines: jsCode.split('\n').filter(line => line.length > 80).length,
        comments: (jsCode.match(/\/\/.+|\/\*.+?\*\//g) || []).length
    };
    let score = 20;
    score -= Math.min(5, metrics.complexity * 0.5);
    score -= Math.min(5, metrics.nesting * 0.3);
    score -= Math.min(5, metrics.longLines * 0.2);
    score += Math.min(5, metrics.comments * 0.5);
    return Math.max(0, Math.min(20, score));
}


function evaluatePerformance(compiledCljsCode, input) {
    const start = process.hrtime();
    const result = executeCompiledProcessNumbers(compiledCljsCode, input);
    const duration = process.hrtime(start);
    const executionTime = duration[0] * 1000 + duration[1] / 1e6; // ms

    if (result === null) return 0; // Penalize if execution failed
    if (executionTime === 0) return 20;
    // Target: 10ms for full score, adjust as needed
    return Math.min(20, Math.max(0, 20 * (10 / executionTime)));
}

function evaluateFunctionalEquivalence() {
    const { jsCode: compiledCljsCode } = readSourceFiles();
    const cljsResult = executeCompiledProcessNumbers(compiledCljsCode, TEST_DATA);

    // JavaScript equivalent for comparison
    const jsEquivalentProcessNumbers = (numbers) => {
        if (!Array.isArray(numbers)) return null;
        const sumEven = numbers.filter(n => typeof n === 'number' && n % 2 === 0).reduce((acc, curr) => acc + curr, 0);
        const numericNumbers = numbers.filter(n => typeof n === 'number');
        const maxNumber = numericNumbers.length > 0 ? numericNumbers.reduce((acc, curr) => Math.max(acc, curr), -Infinity) : -Infinity;
        return { sum_even: sumEven, max_number: maxNumber };
    };
    const jsReferenceResult = jsEquivalentProcessNumbers(TEST_DATA);
    
    // console.log("JS Reference Result:", jsReferenceResult);
    // console.log("CLJS Transpiled Result:", cljsResult);

    return Math.min(30, 30 * compareResults(jsReferenceResult, cljsResult));
}

function evaluateMemoryUsage(compiledCljsCode) {
    const beforeMemory = process.memoryUsage().heapUsed;
    const result = executeCompiledProcessNumbers(compiledCljsCode, TEST_DATA);
    const afterMemory = process.memoryUsage().heapUsed;
    const memoryDiff = afterMemory - beforeMemory;

    if (result === null) return 0; // Penalize if execution failed
    // console.log(`Memory Diff: ${memoryDiff} bytes`);
    if (memoryDiff <= 0) return 15; // Max score if memory didn't increase
    // Score decreases as memory usage increases. 500KB usage results in 0 points from this factor.
    return Math.min(15, Math.max(0, 15 * (1 - memoryDiff / 500000)));
}

// ... evaluateErrorHandling remains the same ...
function evaluateErrorHandling(jsCode) {
    const errorPatterns = ['try', 'catch', 'throw', 'finally'];
    const score = errorPatterns.reduce((acc, pattern) =>
        acc + (jsCode.includes(pattern) ? 3.75 : 0), 0);
    return Math.min(15, score);
}

function generateReport() {
    console.log('=== Code Evaluation Report ===\n');
    console.log(`Test Data: ${TEST_DATA}\n`);

    const { jsCode, clojureCode } = readSourceFiles(); // jsCode is the compiled ClojureScript

    const scores = {
        readability: evaluateReadability(jsCode, clojureCode),
        performance: evaluatePerformance(jsCode, TEST_DATA),
        functionalEquivalence: evaluateFunctionalEquivalence(), // No longer needs jsCode/clojureCode passed
        memoryUsage: evaluateMemoryUsage(jsCode),
        errorHandling: evaluateErrorHandling(jsCode)
    };

    console.log('Scores:');
    Object.entries(scores).forEach(([category, score]) => {
        // Ensure score is a number before calling toFixed
        const numericScore = typeof score === 'number' ? score : 0;
        const maxPoints = category === 'functionalEquivalence' ? 30 : (category === 'memoryUsage' || category === 'errorHandling' ? 15 : 20);
        console.log(`${category}: ${numericScore.toFixed(2)}/${maxPoints}`);
    });

    const totalScore = Object.values(scores).reduce((a, b) => (typeof a === 'number' ? a : 0) + (typeof b === 'number' ? b : 0), 0);
    console.log(`\nTotal Score: ${totalScore.toFixed(2)}/100`);
}

generateReport();
                    

Script used for Java2Python

This script evaluates the Java code transpiled to Python. It runs a test case and compares the results.


from __future__ import print_function, division
import ast
import re
import javalang
from javalang.tree import ClassDeclaration, MethodDeclaration
import os
import subprocess
import tempfile
import json
import sys
from typing import Dict, List, Any, Tuple, Optional

class TranspilationEvaluator:
    def __init__(self):
        self.test_cases = []
        self.weights = {
            "functional_correctness": 0.40,  # Most important - does it work?
            "semantic_equivalence": 0.25,    # Logic flow and behavior
            "code_quality": 0.15,            # Pythonic style and best practices
            "structural_similarity": 0.10,   # Basic structure matching
            "error_handling": 0.10           # Robustness and edge cases
        }
    
    def evaluate_transpilation(self, java_file: str, python_file: str, test_cases: Optional[List[Dict]] = None) -> Dict[str, float]:
        """Enhanced evaluation with semantic correctness focus"""
        
        try:
            # Read files
            with open(java_file, 'r') as f:
                java_code = f.read()
            with open(python_file, 'r') as f:
                python_code = f.read()
            
            # Parse files
            java_tree = javalang.parse.parse(java_code)
            python_tree = ast.parse(python_code)
            
        except Exception as e:
            print(f"Error reading/parsing files: {e}")
            return {"overall_score": 0.0}
        
        # Set default test cases if none provided
        if test_cases is None:
            test_cases = self.generate_default_test_cases()
        
        # Evaluation metrics
        scores = {
            "functional_correctness": self.evaluate_functional_correctness(python_code, test_cases),
            "semantic_equivalence": self.evaluate_semantic_equivalence(java_tree, python_tree, java_code, python_code),
            "code_quality": self.evaluate_code_quality(python_tree, python_code),
            "structural_similarity": self.evaluate_structural_similarity(java_tree, python_tree),
            "error_handling": self.evaluate_error_handling(python_code, test_cases)
        }
        
        # Calculate weighted overall score
        overall_score = sum(scores[metric] * self.weights[metric] for metric in self.weights)
        scores["overall_score"] = overall_score
        
        return scores
    
    def generate_default_test_cases(self) -> List[Dict]:
        """Generate comprehensive test cases for the processNumbers method"""
        return [
            # Normal cases
            {"input": [1, 2, 3, 4, 5], "expected_sum_even": 6, "expected_max": 5},
            {"input": [10, 20, 30], "expected_sum_even": 60, "expected_max": 30},
            {"input": [1, 3, 5], "expected_sum_even": 0, "expected_max": 5},
            {"input": [2, 4, 6], "expected_sum_even": 12, "expected_max": 6},
            
            # Edge cases
            {"input": [0], "expected_sum_even": 0, "expected_max": 0},
            {"input": [-1, -2, -3], "expected_sum_even": -2, "expected_max": -1},
            {"input": [100, -50, 25, -30], "expected_sum_even": 50, "expected_max": 100},
            
            # Large numbers
            {"input": [1000000, 999999, 1000001], "expected_sum_even": 1000000, "expected_max": 1000001},
        ]
    
    def evaluate_functional_correctness(self, python_code: str, test_cases: List[Dict]) -> float:
        """Test if the Python code produces correct outputs"""
        try:
            # Create a temporary file to execute the Python code
            with tempfile.NamedTemporaryFile(mode='w', suffix='.py', delete=False) as temp_file:
                # Add test runner to the Python code
                test_code = python_code + "\n\n" + self.generate_test_runner(test_cases)
                temp_file.write(test_code)
                temp_file_path = temp_file.name
            
            # Execute the test
            result = subprocess.run([sys.executable, temp_file_path], 
                                  capture_output=True, text=True, timeout=10)
            
            # Clean up
            os.unlink(temp_file_path)
            
            if result.returncode == 0:
                # Parse test results
                output_lines = result.stdout.strip().split('\n')
                if output_lines and output_lines[-1].startswith('TEST_RESULTS:'):
                    results_json = output_lines[-1].replace('TEST_RESULTS:', '')
                    test_results = json.loads(results_json)
                    
                    passed = test_results.get('passed', 0)
                    total = test_results.get('total', len(test_cases))
                    
                    return (passed / total) * 100 if total > 0 else 0
            
            return 0
            
        except Exception as e:
            print(f"Functional correctness test failed: {e}")
            return 0
    
    def generate_test_runner(self, test_cases: List[Dict]) -> str:
        """Generate Python test runner code"""
        return f"""
import json

def run_tests():
    test_cases = {test_cases}
    passed = 0
    total = len(test_cases)
    
    for i, test_case in enumerate(test_cases):
        try:
            numbers = test_case['input']
            expected_sum = test_case['expected_sum_even']
            expected_max = test_case['expected_max']
            
            result = Main.processNumbers(numbers)
            
            if hasattr(result, 'sumEven') and hasattr(result, 'maxNumber'):
                if result.sumEven == expected_sum and result.maxNumber == expected_max:
                    passed += 1
        except Exception as e:
            pass  # Test failed
    
    print(f"TEST_RESULTS:{{'passed': {{passed}}, 'total': {{total}}}}")

if __name__ == "__main__":
    run_tests()
"""
    
    def evaluate_semantic_equivalence(self, java_tree, python_tree, java_code: str, python_code: str) -> float:
        """Evaluate if the logic and behavior are equivalent"""
        score = 0
        max_score = 100
        
        # Check algorithm logic preservation
        logic_score = self.check_algorithm_logic(java_code, python_code)
        score += logic_score * 0.4
        
        # Check data flow and variable usage
        data_flow_score = self.check_data_flow(java_code, python_code)
        score += data_flow_score * 0.3
        
        # Check method behavior equivalence
        method_score = self.check_method_equivalence(java_tree, python_tree)
        score += method_score * 0.3
        
        return min(score, max_score)
    
    def check_algorithm_logic(self, java_code: str, python_code: str) -> float:
        """Check if the core algorithm logic is preserved"""
        score = 0
        
        # Check for key algorithmic patterns
        patterns = [
            (r'num\s*%\s*2\s*==\s*0', 'Even number check'),
            (r'sumEven\s*\+?=\s*num', 'Sum accumulation'),
            (r'num\s*>\s*maxNumber', 'Maximum comparison'),
            (r'maxNumber\s*=\s*num', 'Maximum update'),
        ]
        
        for pattern, description in patterns:
            java_match = bool(re.search(pattern, java_code))
            python_match = bool(re.search(pattern, python_code))
            
            if java_match and python_match:
                score += 25  # Each pattern worth 25 points
        
        return score
    
    def check_data_flow(self, java_code: str, python_code: str) -> float:
        """Check if data flows through the code similarly"""
        # Check variable initialization patterns
        init_patterns = [
            r'sumEven\s*=\s*0',
            r'maxNumber\s*=.*get\(0\)|maxNumber\s*=.*\[0\]'
        ]
        
        score = 0
        for pattern in init_patterns:
            if re.search(pattern, java_code) and re.search(pattern, python_code):
                score += 50
        
        return score
    
    def check_method_equivalence(self, java_tree, python_tree) -> float:
        """Check if methods have equivalent signatures and return patterns"""
        java_methods = self.extract_method_info(java_tree)
        python_methods = self.extract_python_method_info(python_tree)
        
        if not java_methods:
            return 100 if not python_methods else 0
        
        matches = 0
        for java_method in java_methods:
            for python_method in python_methods:
                if java_method['name'] == python_method['name']:
                    matches += 1
                    break
        
        return (matches / len(java_methods)) * 100
    
    def evaluate_code_quality(self, python_tree, python_code: str) -> float:
        """Evaluate Python code quality and adherence to best practices"""
        score = 0
        
        # Check for Pythonic patterns
        if 'for num in numbers:' in python_code:
            score += 20  # Pythonic iteration
        
        # Check proper class structure
        classes = [node for node in ast.walk(python_tree) if isinstance(node, ast.ClassDef)]
        if classes:
            score += 20  # Has proper class structure
        
        # Check for proper method definitions
        methods = [node for node in ast.walk(python_tree) if isinstance(node, ast.FunctionDef)]
        if methods:
            score += 20  # Has proper methods
        
        # Check for return statements
        if 'return ' in python_code:
            score += 20  # Has return statements
        
        # Check variable naming consistency
        if all(var in python_code for var in ['sumEven', 'maxNumber']):
            score += 20  # Maintains variable names
        
        return min(score, 100)
    
    def evaluate_structural_similarity(self, java_tree, python_tree) -> float:
        java_classes = len([node for _, node in java_tree.filter(ClassDeclaration)])
        java_classes = len([node for _, node in java_tree.filter(ClassDeclaration)])
        python_classes = len([node for node in ast.walk(python_tree) if isinstance(node, ast.ClassDef)])
        
        if java_classes == 0:
            return 100 if python_classes == 0 else 0
        
        return min(100, (python_classes / java_classes) * 100)
    
    def evaluate_error_handling(self, python_code: str, test_cases: List[Dict]) -> float:
        """Evaluate how well the code handles edge cases and errors"""
        score = 0
        
        # Test with edge cases
        edge_case_tests = [
            {"input": [], "should_fail": True},  # Empty list
            {"input": None, "should_fail": True},  # None input
        ]
        
        try:
            # Basic robustness check - does it handle the expected inputs without crashing?
            with tempfile.NamedTemporaryFile(mode='w', suffix='.py', delete=False) as temp_file:
                test_code = python_code + "\n\n" + """
try:
    result = Main.processNumbers([1, 2, 3])
    print("BASIC_TEST_PASSED")
except Exception as e:
    print(f"BASIC_TEST_FAILED: {e}")
"""
                temp_file.write(test_code)
                temp_file_path = temp_file.name
            
            result = subprocess.run([sys.executable, temp_file_path], 
                                  capture_output=True, text=True, timeout=5)
            os.unlink(temp_file_path)
            
            if "BASIC_TEST_PASSED" in result.stdout:
                score += 50
            
            # Additional points for not having obvious vulnerabilities
            if 'eval(' not in python_code and 'exec(' not in python_code:
                score += 50
                
        except Exception:
            pass
        
        return score
    
    def extract_method_info(self, java_tree) -> List[Dict]:
        """Extract method information from Java AST"""
        methods = []
        for _, node in java_tree.filter(MethodDeclaration):
            methods.append({
                'name': node.name,
                'parameters': len(node.parameters) if node.parameters else 0
            })
        return methods
    
    def extract_python_method_info(self, python_tree) -> List[Dict]:
        """Extract method information from Python AST"""
        methods = []
        for node in ast.walk(python_tree):
            if isinstance(node, ast.FunctionDef) and node.name != '__init__':
                methods.append({
                    'name': node.name,
                    'parameters': len(node.args.args)
                })
        return methods
    
    def print_detailed_results(self, scores: Dict[str, float]):
        """Print detailed evaluation results"""
        print("Enhanced Transpilation Evaluation Results")
        print("=" * 50)
        
        categories = [
            ("Functional Correctness", "functional_correctness", "Does the code work correctly?"),
            ("Semantic Equivalence", "semantic_equivalence", "Is the logic preserved?"),
            ("Code Quality", "code_quality", "Is it well-written Python?"),
            ("Structural Similarity", "structural_similarity", "Does structure match?"),
            ("Error Handling", "error_handling", "Does it handle edge cases?")
        ]
        
        for name, key, description in categories:
            if key in scores:
                weight = self.weights.get(key, 0) * 100
                score = scores[key]
                print(f"{name:20} | {score:6.1f}/100 | Weight: {weight:4.1f}% | {description}")
        
        print("-" * 50)
        print(f"{'Overall Score':20} | {scores.get('overall_score', 0):6.1f}/100")
        
        # Provide recommendations
        self.provide_recommendations(scores)
    
    def provide_recommendations(self, scores: Dict[str, float]):
        """Provide improvement recommendations based on scores"""
        print("\nRecommendations for Improvement:")
        print("-" * 30)
        
        if scores.get('functional_correctness', 0) < 70:
            print("• Critical: Fix functional correctness - the code doesn't produce correct outputs")
        
        if scores.get('semantic_equivalence', 0) < 70:
            print("• Important: Review algorithm logic - the transpiled code doesn't preserve the original behavior")
        
        if scores.get('code_quality', 0) < 70:
            print("• Consider: Improve Python code style and best practices")
        
        if scores.get('error_handling', 0) < 70:
            print("• Consider: Add better error handling for edge cases")

def main():
    evaluator = TranspilationEvaluator()
    
    java_file = "Main.java"
    python_file = "Main.py"
    
    if not os.path.exists(java_file) or not os.path.exists(python_file):
        print(f"Error: Files not found. Make sure {java_file} and {python_file} exist.")
        return
    
    scores = evaluator.evaluate_transpilation(java_file, python_file)
    evaluator.print_detailed_results(scores)

if __name__ == "__main__":
    main()
                    

Script used for JScodeshift

This script works in the same way as the first one where we do need two different files, one with the benchmark and other that calls it and displays both the performance metrics and evaluation of the transpiled code.


const jscodeshift = require('jscodeshift');
const { performance } = require('perf_hooks');
const v8 = require('v8');
const fs = require('fs');

const inputCode = fs.readFileSync('input.js', 'utf8');// input code to be transformed

function transformToTypeScript(source) {
    const j = jscodeshift;
    return j(source)
        .find(j.FunctionDeclaration)
        .forEach(path => {
            path.value.params[0].typeAnnotation = j.tsTypeAnnotation(
                j.tsArrayType(j.tsNumberKeyword())
            );
            path.value.returnType = j.tsTypeAnnotation(
                j.tsTypeLiteral([
                    j.tsPropertySignature(
                        j.identifier('sumEven'),
                        j.tsTypeAnnotation(j.tsNumberKeyword())
                    ),
                    j.tsPropertySignature(
                        j.identifier('maxNumber'),
                        j.tsTypeAnnotation(j.tsNumberKeyword())
                    )
                ])
            );
        })
        .toSource();
}

function createMetricsObject(iterations) {
    return {
        iterations,
        executionTime: {
            values: [],
            min: Number.MAX_VALUE,
            max: Number.MIN_VALUE,
            mean: 0,
            median: 0
        },
        cpuUsage: {
            values: [],
            min: Number.MAX_VALUE,
            max: Number.MIN_VALUE,
            mean: 0,
            median: 0
        },
        memoryUsage: {
            values: [],
            min: Number.MAX_VALUE,
            max: Number.MIN_VALUE,
            mean: 0,
            median: 0
        }
    };
}

function collectMetrics(iterations = 100) {
    const metrics = createMetricsObject(iterations);

    // Save transpiled output to file
    console.log('Generating TypeScript output file...');
    const tsOutput = transformToTypeScript(inputCode);
    fs.writeFileSync('output.ts', tsOutput, 'utf8');
    console.log('TypeScript output saved to output.ts');

    // Warm-up phase
    console.log('Warming up...');
    for (let i = 0; i < 5; i++) {
        transformToTypeScript(inputCode);
    }

    // Measurement phase
    console.log('Running benchmarks...');
    for (let i = 0; i < iterations; i++) {
        // Clear garbage before each iteration
        if (global.gc) {
            global.gc();
        }

        const startMemory = process.memoryUsage().heapUsed;
        const startCPU = process.cpuUsage();
        const startTime = process.hrtime();

        transformToTypeScript(inputCode);

        const endCPU = process.cpuUsage(startCPU);
        const elapsedTime = process.hrtime(startTime);

        // Calculate CPU usage percentage
        const elapsedSecs = elapsedTime[0] + elapsedTime[1] / 1e9;
        const totalCPUTime = (endCPU.user + endCPU.system) / 1e6; // Convert to seconds
        const cpuPercent = (totalCPUTime / elapsedSecs) * 100;

        metrics.cpuUsage.values.push(cpuPercent);

        const startPerfTime = performance.now();

        transformToTypeScript(inputCode);

        const endPerfTime = performance.now();
        const endMemory = process.memoryUsage().heapUsed;

        // Collect metrics
        const executionTime = endPerfTime - startPerfTime;
        const memoryUsed = (endMemory - startMemory) / (1024 * 1024); // Convert to MB

        metrics.executionTime.values.push(executionTime);
        metrics.memoryUsage.values.push(memoryUsed);

        // Update min/max values
        metrics.executionTime.min = Math.min(metrics.executionTime.min, executionTime);
        metrics.executionTime.max = Math.max(metrics.executionTime.max, executionTime);
        metrics.memoryUsage.min = Math.min(metrics.memoryUsage.min, memoryUsed);
        metrics.memoryUsage.max = Math.max(metrics.memoryUsage.max, memoryUsed);
    }

    // Calculate statistics
    for (const metric of ['executionTime', 'cpuUsage', 'memoryUsage']) {
        const sorted = [...metrics[metric].values].sort((a, b) => a - b);
        metrics[metric].median = sorted[Math.floor(sorted.length / 2)];
        metrics[metric].mean = sorted.reduce((a, b) => a + b, 0) / sorted.length;
    }

    return metrics;
}

function printResults(metrics) {
    console.log('\nJSCodeshift Performance Metrics');
    console.log('=============================');
    console.log(`Number of iterations: ${metrics.iterations}`);
    
    const formatMetric = (name, data) => {
        console.log(`\n${name}:`);
        console.log(`  Minimum: ${data.min.toFixed(3)}`);
        console.log(`  Maximum: ${data.max.toFixed(3)}`);
        console.log(`  Mean: ${data.mean.toFixed(3)}`);
        console.log(`  Median: ${data.median.toFixed(3)}`);
    };

    formatMetric('Execution Time (ms)', metrics.executionTime);
    formatMetric('CPU Usage (%)', metrics.cpuUsage);
    formatMetric('Memory Usage (MB)', metrics.memoryUsage);
}

// Run benchmark with garbage collection enabled
try {
    console.log('Starting JSCodeshift performance benchmark...');
    const metrics = collectMetrics(100);
    printResults(metrics);
} catch (error) {
    console.error('Benchmark failed:', error);
}

module.exports = { transformToTypeScript };    
                    

const fs = require('fs');
const { transformToTypeScript } = require('./jscodeshift-benchmark.js');

// Weights for each category
const WEIGHTS = {
  functional_correctness: 0.40,
  semantic_equivalence: 0.25,
  code_quality: 0.15,
  structural_similarity: 0.10,
  error_handling: 0.10
};

// Expected TypeScript code for comparison
// No topo do evaluator:
const inputCode = fs.readFileSync('input.js', 'utf8');
const expectedOutput = transformToTypeScript(inputCode);


// Normalizes code (removes spacing, newlines, etc.)
function normalizeCode(code) {
  return code
    .replace(/\s+/g, ' ')
    .replace(/[\n\r]/g, '')
    .replace(/\s*([{}():,;])\s*/g, '$1')
    .trim();
}

// Levenshtein distance for similarity
function levenshteinDistance(a, b) {
  const matrix = Array(b.length + 1).fill(null).map(() => Array(a.length + 1).fill(null));
  for (let i = 0; i <= a.length; i++) matrix[0][i] = i;
  for (let j = 0; j <= b.length; j++) matrix[j][0] = j;
  for (let j = 1; j <= b.length; j++) {
    for (let i = 1; i <= a.length; i++) {
      matrix[j][i] = a[i-1] === b[j-1]
        ? matrix[j-1][i-1]
        : Math.min(
            matrix[j-1][i-1] + 1,
            matrix[j][i-1] + 1,
            matrix[j-1][i] + 1
          );
    }
  }
  return matrix[b.length][a.length];
}

// Calculates similarity percentage
function calculateSimilarity(generated, expected) {
  const normGen = normalizeCode(generated);
  const normExp = normalizeCode(expected);
  const longer = normGen.length > normExp.length ? normGen : normExp;
  if (longer.length === 0) return 100;
  const dist = levenshteinDistance(longer, normGen.length > normExp.length ? normExp : normGen);
  return (1 - dist / longer.length) * 100;
}

// Main evaluation function
function evaluatePrecision() {
  let generated;
  try {
    generated = fs.readFileSync('output.ts', 'utf8');
  } catch (err) {
    console.error('Not possible to read output.ts:', err);
    return;
  }

// Basic metrics
  const similarity = calculateSimilarity(generated, expectedOutput);
  const exactMatch = normalizeCode(generated) === normalizeCode(expectedOutput);

  // 1. Functional correctness (40%) - baseia-se no match exato e syntax válida
  const syntaxOk = analyzeSyntax(generated).passed;
  const functional_correctness = exactMatch && syntaxOk ? 100 : syntaxOk ? similarity : 0;

  // 2. Semantic equivalence (25%) - usa similaridade de lógica
  const semantic_equivalence = similarity;

  // 3. Code quality (15%) - avalia uso de let/const vs var e formatação básica
  const qualityChecks = {
    noVar: !/\bvar\b/.test(generated),
    usesLetConst: /\b(let|const)\b/.test(generated)
  };
  const code_quality = (Object.values(qualityChecks).filter(v => v).length / Object.keys(qualityChecks).length) * 100;

  // 4. Structural similarity (10%) - percentagem de match na estrutura básica (funções, loops)
  const structuralChecks = analyzeSyntax(generated).details;
  const structScore = (Object.values(structuralChecks).filter(v => v).length / Object.keys(structuralChecks).length) * 100;
  const structural_similarity = structScore;

  // 5. Error handling (10%) - presença de try/catch
  const error_handling = /try\s*\{/.test(generated) && /catch\s*\(/.test(generated) ? 100 : 0;

  // Score global ponderado
  const overallScore =
    functional_correctness * WEIGHTS.functional_correctness +
    semantic_equivalence * WEIGHTS.semantic_equivalence +
    code_quality * WEIGHTS.code_quality +
    structural_similarity * WEIGHTS.structural_similarity +
    error_handling * WEIGHTS.error_handling;

  const results = {
    functional_correctness: functional_correctness.toFixed(2),
    semantic_equivalence: semantic_equivalence.toFixed(2),
    code_quality: code_quality.toFixed(2),
    structural_similarity: structural_similarity.toFixed(2),
    error_handling: error_handling.toFixed(2),
    overallScore: overallScore.toFixed(2)
  };

  printResults(results);
  return results;
}

// Checks basic syntactic structure
function analyzeSyntax(code) {
  const checks = {
    hasExport: code.includes('export '),
    hasFunction: code.includes('function processNumbers'),
    hasReturn: code.includes('return '),
    hasFor: code.includes('for ('),
    balancedBraces: ((code.match(/{/g)||[]).length === (code.match(/}/g)||[]).length)
  };
  return { passed: Object.values(checks).every(x => x), details: checks };
}

// Results printing
function printResults(res) {
  console.log('\nTypeScript Output Precision Evaluation');
  console.log('===================================');
  console.log(`Functional Correctness: ${res.functional_correctness}%`);
  console.log(`Semantic Equivalence: ${res.semantic_equivalence}%`);
  console.log(`Code Quality: ${res.code_quality}%`);
  console.log(`Structural Similarity: ${res.structural_similarity}%`);
  console.log(`Error Handling: ${res.error_handling}%`);
  console.log(`\nOverall Score: ${res.overallScore}%`);
}

// Run evaluation
evaluatePrecision();
                    

Script used for C2Rust

This script evaluates the C code transpiled to Rust. It checks for functional correctness, semantic equivalence, and other metrics. At the end even coments if it was an excellent transpilation, a good one or if it needs improvements.


use std::fs;
use std::path::Path;
use std::io::{self, Read};
use regex::Regex;

#[derive(Debug)]
struct CodeBases {
    c_source: String,
    rust_transpiled: String,
}

#[derive(Debug)]
struct Evaluation {
    functional_correctness: f32,      // 0.40 weight - Does it work?
    semantic_equivalence: f32,        // 0.25 weight - Logic flow and behavior
    code_quality: f32,               // 0.15 weight - Rust style and best practices
    structural_similarity: f32,       // 0.10 weight - Basic structure matching
    error_handling: f32,             // 0.10 weight - Robustness and edge cases
    total_score: f32,
}

// Weights for final score calculation
const WEIGHTS: [f32; 5] = [0.40, 0.25, 0.15, 0.10, 0.10];

fn read_source_files() -> io::Result {
    let c_path = Path::new("src/original/process_numbers.c");
    let rust_path = Path::new("src/transpiled/process_numbers.rs");
    
    let mut c_source = String::new();
    let mut rust_transpiled = String::new();
    
    fs::File::open(c_path)?.read_to_string(&mut c_source)?;
    fs::File::open(rust_path)?.read_to_string(&mut rust_transpiled)?;
    
    Ok(CodeBases {
        c_source,
        rust_transpiled,
    })
}

fn evaluate_functional_correctness(code: &CodeBases) -> f32 {
    let mut score: f32 = 0.0;
    let max_score = 100.0;
    
    // Check if function signature is preserved and callable
    if code.rust_transpiled.contains("pub unsafe extern \"C\" fn processNumbers") ||
       code.rust_transpiled.contains("pub fn processNumbers") {
        score += 25.0; // Function exists and is public
    }
    
    // Check struct definition equivalence
    let c_struct_fields = extract_struct_fields(&code.c_source, "Results");
    let rust_struct_fields = extract_rust_struct_fields(&code.rust_transpiled, "Results");
    
    if !c_struct_fields.is_empty() && !rust_struct_fields.is_empty() {
        let matching_fields = c_struct_fields.iter()
            .filter(|field| rust_struct_fields.contains(field))
            .count();
        
        if matching_fields == c_struct_fields.len() {
            score += 25.0; // All struct fields preserved
        } else {
            score += (matching_fields as f32 / c_struct_fields.len() as f32) * 25.0;
        }
    }
    
    // Check return type compatibility
    if code.rust_transpiled.contains("-> Results") {
        score += 15.0; // Return type preserved
    }
    
    // Check basic algorithmic logic preservation
    let c_has_loop = code.c_source.contains("for") || code.c_source.contains("while");
    let rust_has_loop = code.rust_transpiled.contains("for") || code.rust_transpiled.contains("while");
    
    if c_has_loop && rust_has_loop {
        score += 20.0; // Loop structure preserved
    }
    
    // Check conditional logic preservation
    let c_conditions = code.c_source.matches("if").count();
    let rust_conditions = code.rust_transpiled.matches("if").count();
    
    if c_conditions > 0 && rust_conditions >= c_conditions {
        score += 15.0; // Conditional logic preserved
    }
    
    score.min(max_score)
}

fn evaluate_semantic_equivalence(code: &CodeBases) -> f32 {
    let mut score: f32 = 0.0;
    let max_score = 100.0;
    
    // Check variable initialization patterns
    if code.c_source.contains("= 0") && code.rust_transpiled.contains("= 0") {
        score += 20.0;
    }
    
    // Check arithmetic operations preservation (%, +, >)
    let c_operations = ["%", "+=", ">"].iter()
        .map(|op| code.c_source.matches(op).count())
        .sum::();
    let rust_operations = ["%", "+=", ">"].iter()
        .map(|op| code.rust_transpiled.matches(op).count())
        .sum::();
    
    if c_operations > 0 && rust_operations >= c_operations {
        score += 25.0; // Arithmetic operations preserved
    }
    
    // Check array/pointer access patterns
    let c_array_access = code.c_source.contains("[") && code.c_source.contains("]");
    let rust_has_access = code.rust_transpiled.contains("offset") || 
                         code.rust_transpiled.contains("[") ||
                         code.rust_transpiled.contains("get(");
    
    if c_array_access && rust_has_access {
        score += 20.0;
    }
    
    // Check loop iteration logic
    if code.c_source.contains("i < length") || code.c_source.contains("i++") {
        if code.rust_transpiled.contains("i < length") || 
           code.rust_transpiled.contains("i += 1") {
            score += 20.0;
        }
    }
    
    // Check struct assignment patterns
    if code.c_source.contains("Results results = {") {
        if code.rust_transpiled.contains("Results {") {
            score += 15.0;
        }
    }
    
    score.min(max_score)
}

fn evaluate_code_quality(code: &CodeBases) -> f32 {
    let mut score: f32 = 100.0; // Start high and deduct for poor practices
    let max_score = 100.0;
    
    // Heavy penalty for excessive unsafe usage
    let unsafe_count = code.rust_transpiled.matches("unsafe").count();
    if unsafe_count > 1 {
        score -= 30.0; // Major penalty for unnecessary unsafe
    }
    
    // Penalty for using libc types instead of native Rust types
    let libc_usage = code.rust_transpiled.matches("libc::c_int").count();
    score -= (libc_usage as f32 * 3.0).min(20.0);
    
    // Penalty for raw pointer usage where slices could be used
    if code.rust_transpiled.contains("*mut") {
        score -= 15.0;
    }
    
    // Penalty for C-style loops instead of iterators
    if code.rust_transpiled.contains("while") && !code.rust_transpiled.contains("for") {
        score -= 10.0;
    }
    
    // Penalty for explicit returns (not idiomatic)
    let explicit_returns = code.rust_transpiled.matches("return ").count();
    score -= (explicit_returns as f32 * 5.0).min(10.0);
    
    // Penalty for unnecessary mutability
    let mut_count = code.rust_transpiled.matches("mut ").count();
    if mut_count > 3 { // Some mutability is expected
        score -= ((mut_count - 3) as f32 * 2.0).min(10.0);
    }
    
    // Bonus for good practices
    if code.rust_transpiled.contains("#[derive(") {
        score += 5.0; // Good use of derives
    }
    
    // Penalty for non-standard naming (though C2Rust might require this)
    if code.rust_transpiled.contains("non_snake_case") {
        score -= 5.0;
    }
    
    score.max(0.0).min(max_score)
}

fn evaluate_error_handling(code: &CodeBases) -> f32 {
    let mut score: f32 = 0.0;
    let max_score = 100.0;
    
    // Check for potential buffer overflow protection
    if code.rust_transpiled.contains("get(") || code.rust_transpiled.contains("get_mut(") {
        score += 40.0; // Uses safe indexing
    } else if code.rust_transpiled.contains("offset") {
        score += 10.0; // Uses unsafe but controlled access
    }
    
    // Check for array bounds validation
    if code.rust_transpiled.contains("length") && code.rust_transpiled.contains("<") {
        score += 30.0; // Has bounds checking in loop
    }
    
    // Check for null pointer handling (though C2Rust might not do this)
    if code.rust_transpiled.contains("is_null()") || code.rust_transpiled.contains("Option") {
        score += 20.0;
    }
    
    // Penalty for unchecked arithmetic that could overflow
    if !code.rust_transpiled.contains("checked_") && 
       (code.rust_transpiled.contains("+=") || code.rust_transpiled.contains("*")) {
        score -= 10.0;
    }
    
    // Basic score for maintaining input validation structure
    if code.c_source.contains("length") && code.rust_transpiled.contains("length") {
        score += 10.0;
    }
    
    score.max(0.0).min(max_score)
}

fn evaluate_structural_similarity(code: &CodeBases) -> f32 {
    // TODO: Implement structural similarity evaluation
    // For now, returning a placeholder score
    let score: f32 = 0.0;
    let max_score = 100.0;

    // Example checks (can be expanded):
    // - Compare function counts
    let c_funcs = extract_function_names(&code.c_source).len();
    let rust_funcs = extract_rust_function_names(&code.rust_transpiled).len();
    if c_funcs > 0 && rust_funcs >= c_funcs {
        // score += 20.0;
    }

    // - Compare variable counts (very basic)
    let c_vars = extract_c_variables(&code.c_source).len();
    let rust_vars = extract_rust_variables(&code.rust_transpiled).len();
    if c_vars > 0 && rust_vars >= c_vars {
        // score += 10.0;
    }
    
    score.min(max_score)
}

// Helper functions for pattern extraction
fn extract_struct_fields(code: &str, struct_name: &str) -> Vec {
    let pattern = format!(r"(?s)typedef struct\s*(?:\w*\s*)?\{{\s*([^}}]*)\s*\}}\s*{};", struct_name);
    let regex = Regex::new(&pattern).unwrap();
    
    if let Some(captures) = regex.captures(code) {
        let fields_text = captures.get(1).unwrap().as_str();
        fields_text.lines()
            .filter_map(|line| {
                let trimmed = line.trim();
                if trimmed.is_empty() || trimmed.starts_with("//") {
                    None
                } else {
                    // Extract field name from "type name;"
                    let parts: Vec<&str> = trimmed.split_whitespace().collect();
                    if parts.len() >= 2 {
                        Some(parts[1].trim_end_matches(';').to_string())
                    } else {
                        None
                    }
                }
            })
            .collect()
    } else {
        Vec::new()
    }
}

fn extract_rust_struct_fields(code: &str, struct_name: &str) -> Vec {
    let pattern = format!(r"(?s)pub struct {}\s*\{{\s*([^}}]*)\s*\}}", struct_name);
    let regex = Regex::new(&pattern).unwrap();
    
    if let Some(captures) = regex.captures(code) {
        let fields_text = captures.get(1).unwrap().as_str();
        fields_text.lines()
            .filter_map(|line| {
                let trimmed = line.trim();
                if trimmed.starts_with("pub ") && trimmed.contains(":") {
                    let field_name = trimmed.split(":").next()?.trim().strip_prefix("pub ")?.trim();
                    Some(field_name.to_string())
                } else {
                    None
                }
            })
            .collect()
    } else {
        Vec::new()
    }
}

fn extract_function_names(code: &str) -> Vec {
    let regex = Regex::new(r"(\w+)\s+(\w+)\s*\(").unwrap();
    regex.captures_iter(code)
        .filter_map(|cap| {
            let return_type = cap.get(1)?.as_str();
            if return_type != "if" && return_type != "while" && return_type != "for" {
                Some(cap.get(2)?.as_str().to_string())
            } else {
                None
            }
        })
        .collect()
}

fn extract_rust_function_names(code: &str) -> Vec {
    let regex = Regex::new(r"fn\s+(\w+)\s*\(").unwrap();
    regex.captures_iter(code)
        .map(|cap| cap.get(1).unwrap().as_str().to_string())
        .collect()
}

fn extract_c_variables(code: &str) -> Vec {
    let regex = Regex::new(r"\b(?:int|float|double|char)\s+(\w+)").unwrap();
    regex.captures_iter(code)
        .map(|cap| cap.get(1).unwrap().as_str().to_string())
        .collect()
}

fn extract_rust_variables(code: &str) -> Vec {
    let regex = Regex::new(r"let\s+(?:mut\s+)?(\w+)").unwrap();
    regex.captures_iter(code)
        .map(|cap| cap.get(1).unwrap().as_str().to_string())
        .collect()
}

fn evaluate_transpilation() -> Result {
    let code = read_source_files()?;
    
    let functional_correctness = evaluate_functional_correctness(&code);
    let semantic_equivalence = evaluate_semantic_equivalence(&code);
    let code_quality = evaluate_code_quality(&code);
    let structural_similarity = evaluate_structural_similarity(&code);
    let error_handling = evaluate_error_handling(&code);
    
    // Calculate weighted total score
    let scores = [functional_correctness, semantic_equivalence, code_quality, 
                  structural_similarity, error_handling];
    let total_score = scores.iter().zip(WEIGHTS.iter())
        .map(|(score, weight)| score * weight)
        .sum();

    Ok(Evaluation {
        functional_correctness,
        semantic_equivalence,
        code_quality,
        structural_similarity,
        error_handling,
        total_score,
    })
}

fn main() -> io::Result<()> {
    match evaluate_transpilation() {
        Ok(evaluation) => {
            println!("C2Rust Transpilation Evaluation Results:");
            println!("========================================");
            println!("Functional Correctness: {:.1}/100 (Weight: 40%)", evaluation.functional_correctness);
            println!("Semantic Equivalence:   {:.1}/100 (Weight: 25%)", evaluation.semantic_equivalence);
            println!("Code Quality:           {:.1}/100 (Weight: 15%)", evaluation.code_quality);
            println!("Structural Similarity:  {:.1}/100 (Weight: 10%)", evaluation.structural_similarity);
            println!("Error Handling:         {:.1}/100 (Weight: 10%)", evaluation.error_handling);
            println!("----------------------------------------");
            println!("Total Weighted Score:   {:.1}/100", evaluation.total_score);
            
            // Provide interpretation
            if evaluation.total_score >= 90.0 {
                println!("\n🟢 Excellent transpilation quality");
            } else if evaluation.total_score >= 75.0 {
                println!("\n🟡 Good transpilation with minor issues");
            } else if evaluation.total_score >= 60.0 {
                println!("\n🟠 Acceptable but needs improvement");
            } else {
                println!("\n🔴 Poor transpilation quality - manual review recommended");
            }
            
            Ok(())
        },
        Err(e) => {
            eprintln!("Error during evaluation: {}", e);
            Err(e)
        }
    }
}
                    

Script used for Fennel

This script evaluates the Lua code transpiled from Fennel.


import re
from typing import Dict, List, Tuple

class LuaFennelTranspiledCodeEvaluator:
    def __init__(self, lua_code: str, fennel_code: str = ""):
        """
        Initializes the evaluator with the Lua code and optionally the original Fennel code.
        """
        self.lua_code = lua_code
        self.fennel_code = fennel_code # Store fennel code for potential cross-comparison
        self.weights = {
            "functional_correctness": 0.40,
            "semantic_equivalence": 0.25,
            "code_quality_lua": 0.15,
            "structural_similarity_lua": 0.10,
            "error_handling": 0.10
        }

    def _normalize_fennel_name(self, fennel_name: str) -> str:
        """Converts kebab-case Fennel names to snake_case for Lua comparison."""
        return fennel_name.replace("-", "_")

    def evaluate_functional_correctness(self) -> Tuple[float, List[str]]:
        """
        Proxy evaluation for functional correctness of Lua code transpiled from Fennel.
        Checks for basic Lua syntax, function definitions, return statements, loops, and conditionals.
        Score is out of 100.
        """
        comments = []
        score = 0.0
        max_score = 100.0

        # 1. Basic Lua Syntax (Presence of function, end, local) - Max 20 points
        # This is a very high-level check. A real Lua parser would be needed for true syntax validation.
        if re.search(r"local\s+function", self.lua_code) and re.search(r"\bend\b", self.lua_code):
            score += 20
            comments.append("Basic Lua constructs ('local function', 'end') are present.")
        else:
            comments.append("Missing some fundamental Lua constructs like 'local function' or 'end'. This might indicate a major transpilation issue or incomplete code.")

        # 2. Function definitions (specifically `local function name(...)`) - Max 25 points
        # Example: local function process_numbers(numbers)
        if re.search(r"local\s+function\s+\w+\s*\(", self.lua_code):
            score += 25
            comments.append("Standard Lua function definitions (`local function name(...)`) found.")
        else:
            comments.append("Standard Lua function definitions (`local function name(...)`) seem to be missing.")

        # 3. Return statements, especially table returns - Max 25 points
        # Example: return {["sum-even"] = sum_even, ["max-number"] = max_number}
        if re.search(r"return\s*\{", self.lua_code):
            score += 25
            comments.append("Table return statement `return { ... }` found, common for Fennel map-like returns.")
        elif re.search(r"\breturn\b", self.lua_code):
            score += 10 # Partial credit if any return statement is found
            comments.append("A `return` statement is present, but not specifically a table return. Check if this matches Fennel's intent.")
        else:
            comments.append("No `return` statement found. Functions that should produce values might be incomplete.")

        # 4. Loop structure (e.g., `for ... in ipairs`) - Max 15 points
        if re.search(r"for\s+.*?\s+in\s+ipairs\s*\(.*?\)\s*do[\s\S]*?end", self.lua_code):
            score += 15
            comments.append("`for ... in ipairs(...) do ... end` loop structure found, typical for Fennel's `each` on sequences.")
        else:
            comments.append("Expected `for ... in ipairs(...)` loop structure not found. If Fennel code used `each` or loops, this might be missing.")

        # 5. Conditional structure (e.g., `if ... then ... end`) - Max 15 points
        if re.search(r"if\s+.*?\s+then[\s\S]*?end", self.lua_code):
            score += 15
            comments.append("`if ... then ... end` conditional structure found, corresponding to Fennel's `when` or `if`.")
        else:
            comments.append("No `if ... then ... end` conditional structures found. Fennel conditionals might not have been transpiled.")
        
        return min(score, max_score), comments

    def evaluate_semantic_equivalence(self) -> Tuple[float, List[str]]:
        """
        Heuristic evaluation of semantic equivalence between Fennel and transpiled Lua.
        Checks naming conventions, variable declarations, control flow, and data structures.
        Score is out of 100.
        """
        score = 0.0
        comments = []
        max_score = 100.0

        # 1. Function name transpilation (Fennel kebab-case to Lua snake_case) - Max 20 points
        fennel_func_match = re.search(r"\(fn\s+([\w-]+)", self.fennel_code)
        if fennel_func_match:
            fennel_func_name = fennel_func_match.group(1)
            lua_func_name = self._normalize_fennel_name(fennel_func_name)
            if re.search(rf"local\s+function\s+{lua_func_name}\s*\(", self.lua_code):
                score += 20
                comments.append(f"Function name '{fennel_func_name}' (Fennel) correctly transpiled to '{lua_func_name}' (Lua).")
            else:
                comments.append(f"Expected Lua function '{lua_func_name}' (from Fennel '{fennel_func_name}') not found or mismatched.")
        else:
            comments.append("Could not identify main function name in Fennel code for comparison.")

        # 2. Variable declaration and naming (Fennel `var`/`let` to Lua `local`, kebab-case to snake_case) - Max 20 points
        # Example: (var sum-even 0) -> local sum_even = 0
        fennel_vars = re.findall(r"\((?:var|let)\s+([\w-]+)\s+.*?\)", self.fennel_code) # Simplified
        found_var_matches = 0
        if fennel_vars:
            for f_var in fennel_vars:
                l_var = self._normalize_fennel_name(f_var)
                if re.search(rf"local\s+{l_var}\s*=", self.lua_code):
                    found_var_matches +=1
            if found_var_matches > 0:
                 var_score = (found_var_matches / len(fennel_vars)) * 20
                 score += var_score
                 comments.append(f"Found {found_var_matches}/{len(fennel_vars)} Fennel variable declarations correctly transpiled to Lua `local` variables with name convention (e.g., '{fennel_vars[0]}' -> '{self._normalize_fennel_name(fennel_vars[0])}').")
            else:
                comments.append("No direct matches found for Fennel variable declarations (var/let) to Lua `local` variables with expected naming.")
        else:
            comments.append("No `(var ...)` or `(let ...)` declarations found in Fennel code to check.")


        # 3. Loop transpilation (Fennel `each` with `ipairs` to Lua `for ... in ipairs`) - Max 15 points
        if re.search(r"\(each\s+\[.*?\]\s+\(ipairs", self.fennel_code) and \
           re.search(r"for\s+_\w*,\s*\w+\s+in\s+ipairs\s*\(", self.lua_code):
            score += 15
            comments.append("Fennel `(each ... (ipairs ...))` likely transpiled to Lua `for ... in ipairs(...)` correctly.")
        elif re.search(r"\(each", self.fennel_code):
            comments.append("Fennel `(each ...)` found, but corresponding Lua `for ... in ipairs(...)` or similar is not clear.")


        # 4. Conditional transpilation (Fennel `when` or `if` to Lua `if`) - Max 15 points
        fennel_ifs = len(re.findall(r"\((?:when|if)\s+", self.fennel_code))
        lua_ifs = len(re.findall(r"\bif\s+.*?\s+then", self.lua_code))
        if fennel_ifs > 0 and lua_ifs >= fennel_ifs:
            score += 15
            comments.append(f"Conditional structures: Fennel has ~{fennel_ifs}, Lua has ~{lua_ifs}. Appears consistent.")
        elif fennel_ifs > 0 and lua_ifs < fennel_ifs:
            score += 7 # Partial
            comments.append(f"Conditional structures: Fennel has ~{fennel_ifs}, Lua has ~{lua_ifs}. Some conditionals might be missing or transformed differently.")
        elif fennel_ifs == 0 and lua_ifs == 0:
             score +=15 # No conditionals in source, none in target. Consistent.
             comments.append("No conditional structures found in either Fennel or Lua code.")
        else:
            comments.append("Mismatch in conditional structure counts between Fennel and Lua.")


        # 5. Table/Map return (Fennel keywords to Lua string keys) - Max 20 points
        # Example: {:sum-even sum-even} -> {["sum-even"] = sum_even}
        fennel_map_return = re.search(r"\{\s*(:[\w-]+\s+[\w-]+)", self.fennel_code)
        if fennel_map_return:
            # Check for the specific pattern {["key-name"] = key_name}
            # Extract first key from Fennel to check its Lua counterpart
            first_fennel_key_match = re.search(r":([\w-]+)", fennel_map_return.group(1))
            if first_fennel_key_match:
                f_key = first_fennel_key_match.group(1)
                l_key_val_name = self._normalize_fennel_name(f_key)
                if re.search(rf'return\s*\{{.*?\["{{f_key}}"\]\s*=\s*{{l_key_val_name}}}}', self.lua_code):
                    score += 20
                    comments.append(f"Fennel map return (e.g., ':{f_key}') correctly transpiled to Lua table with string key (e.g., '[\"{f_key}\"]').")
                else:
                    comments.append(f"Fennel map return found, but Lua string key pattern (e.g., '[\"{f_key}\"]') for it is not evident or mismatched.")
            else:
                 comments.append("Fennel map return structure found, but could not extract a key for detailed check.")
        else:
            comments.append("No clear Fennel map return `{:key val}` found to check against Lua table return.")

        # 6. Specific function calls (e.g., math.fmod, table.unpack) - Max 10 points
        fmod_ok = False
        unpack_ok = False
        if re.search(r"\(math\.fmod", self.fennel_code) and re.search(r"math\.fmod\s*\(", self.lua_code):
            fmod_ok = True
        if re.search(r"\(table\.unpack", self.fennel_code) and re.search(r"table\.unpack\s*\(", self.lua_code):
            unpack_ok = True
        
        if fmod_ok and unpack_ok:
            score += 10
            comments.append("Key function calls like `math.fmod` and `table.unpack` appear consistently translated.")
        elif fmod_ok or unpack_ok:
            score += 5
            comments.append("Some key function calls (`math.fmod`, `table.unpack`) are translated, but not all expected ones.")
        else:
            comments.append("Expected key function calls (`math.fmod`, `table.unpack`) not clearly translated if present in Fennel.")
            
        return min(score, max_score), comments

    def evaluate_code_quality_lua(self) -> Tuple[float, List[str]]:
        """
        Evaluates Lua code quality: local variable usage, performance hints, ipairs/pairs.
        Score is out of 100.
        """
        comments = []
        current_score = 0.0
        max_score = 100.0

        # A. Local variable usage within functions (Max 50 points)
        # All top-level variables in Fennel modules usually become local in Lua.
        # This check focuses on variables *inside* the main function.
        func_body_match = re.search(r"local\s+function\s+\w+\s*\([\s\S]*?\)([\s\S]*?)return", self.lua_code, re.MULTILINE)
        local_vars_score = 0
        if func_body_match:
            body = func_body_match.group(1)
            # Simple assignments: var = value
            assignments = re.findall(r"\b([a-zA-Z_][a-zA-Z0-9_]*)\s*=\s*[^=]", body)
            # Explicit local declarations in body: local var = value
            local_defs = re.findall(r"\blocal\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*=", body)
            
            non_local_assignments = 0
            for assign_var in assignments:
                is_param = re.search(rf"local\s+function\s+\w+\s*\((?:[^)]*?\b{assign_var}\b[^)]*?)\)", self.lua_code) # Check if it's a parameter
                if assign_var not in local_defs and not is_param :
                    # Further check if it's not a known global like 'math' or 'table'
                    if assign_var not in ['math', 'table', 'ipairs', 'pairs', 'string', 'os', 'io', '_G']:
                         non_local_assignments += 1
            
            if not assignments: # No assignments, or all are local/params
                 local_vars_score = 50
                 comments.append("Good: All variables within the function body appear to be explicitly local or parameters.")
            elif non_local_assignments == 0:
                local_vars_score = 50
                comments.append("Excellent: All assigned variables within the function body are explicitly `local` or parameters.")
            else:
                # Penalize based on the ratio of non-local to total assignments
                # This is a heuristic.
                total_assigns_in_body = len(assignments)
                local_vars_score = max(0, (1 - (non_local_assignments / total_assigns_in_body)) * 50 if total_assigns_in_body > 0 else 50)
                comments.append(f"Potential non-local variable usage: {non_local_assignments} out of {total_assigns_in_body} assignments in the function body might not be local. Score contribution: {local_vars_score:.2f}/50.")
                if local_vars_score < 35:
                    comments.append("Review variable scoping: Ensure all variables defined inside functions are `local` unless intentionally global.")
            
            # Specific check for sum_even, max_number from example
            if re.search(r"local\s+sum_even\s*=", body) and re.search(r"local\s+max_number\s*=", body):
                comments.append("'sum_even' and 'max_number' are correctly `local` within the function body.")
                local_vars_score = max(local_vars_score, 45) # Boost if these specific ones are good
            else:
                comments.append("Warning: 'sum_even' or 'max_number' might not be `local` inside the function body in the transpiled code.")

        else:
            local_vars_score = 25 # Cannot fully assess if main function body isn't clearly identified
            comments.append("Could not clearly identify the main function body to deeply assess local variable usage inside it. Partial score given.")
        current_score += min(local_vars_score, 50.0)


        # B. Performance Practices (Max 30 points)
        perf_score = 30.0
        loop_blocks = re.findall(r'(for.*?do.*?end|while.*?do.*?end|repeat.*?until.*?)', self.lua_code, re.DOTALL)
        concat_in_loop = False
        for block in loop_blocks:
            if '..' in block: # String concatenation
                concat_in_loop = True
                break
        if concat_in_loop:
            perf_score -= 15
            comments.append("String concatenation '..' detected, potentially inside a loop. If building strings iteratively, consider `table.concat`.")
        
        if re.search(r'for\s+\w+\s*=\s*1\s*,\s*#', self.lua_code): # Numeric for loop accessing length
            perf_score -= 15
            comments.append("Numeric for loop `for i=1,#table` detected. If table is large and not modified, consider caching its length: `local len = #table; for i=1,len do ... end`.")
        current_score += max(0.0, perf_score)

        # C. Proper use of ipairs for table iteration (Max 20 points)
        # Fennel `(each ... (ipairs ...))` strongly suggests `ipairs` in Lua.
        if re.search(r"\(each\s+\[.*?\]\s+\(ipairs", self.fennel_code):
            if re.search(r'ipairs\s*\(', self.lua_code):
                current_score += 20
                comments.append("`ipairs` is correctly used in Lua, matching Fennel's use for sequence iteration.")
            else:
                comments.append("Fennel used `(ipairs ...)`, but `ipairs` not found in the transpiled Lua loop. This could be a semantic mismatch.")
        elif re.search(r'ipairs\s*\(', self.lua_code): # Lua uses ipairs, Fennel might have used something else or it's implicit
            current_score += 10 # Credit for using ipairs
            comments.append("`ipairs` is used in Lua, good for array/sequence iteration.")
        else:
            comments.append("No `ipairs` usage detected. If iterating sequences, `ipairs` is generally preferred over `pairs` or numeric loops on `#table`.")
        
        return min(current_score, max_score), comments

    def evaluate_structural_similarity_lua(self) -> Tuple[float, List[str]]:
        """
        Evaluates structural similarity to common Fennel-to-Lua transpilation patterns.
        Checks for module return style, function definition style. Score out of 100.
        """
        score = 0.0
        comments = []
        max_score = 100.0

        # 1. Module return style (e.g., `return main_function_name`) - Max 40 points
        fennel_main_func_match = re.search(r"\(fn\s+([\w-]+)", self.fennel_code)
        if fennel_main_func_match:
            lua_main_func_name = self._normalize_fennel_name(fennel_main_func_match.group(1))
            if re.search(rf"return\s+{lua_main_func_name}\s*$", self.lua_code.strip()): # End of file
                score += 40
                comments.append(f"Lua code correctly returns the main transpiled function ('{lua_main_func_name}'), typical for Fennel modules.")
            else:
                comments.append(f"Expected module return `return {lua_main_func_name}` not found at the end of the Lua code.")
        else:
            comments.append("Could not determine main function name from Fennel to check Lua module return structure.")


        # 2. Function definition style (`local function name(...)`) - Max 30 points
        if re.search(r"^local\s+function\s+\w+", self.lua_code.strip(), re.MULTILINE): # Starts with local function
            score += 30
            comments.append("Primary function defined as `local function ...`, which is standard.")
        else:
            comments.append("Primary function does not appear to be defined as `local function ...` at the top level.")

        # 3. Consistent use of `local` for top-level definitions - Max 30 points
        # Check if all top-level assignments are `local` (functions or variables)
        # This is a simplification; complex modules might have other structures.
        non_local_top_level = re.search(r"^(?!\s*local|\s*--|\s*$)\w+\s*=", self.lua_code, re.MULTILINE) # Assignment not starting with local or comment
        if not non_local_top_level:
            score += 30
            comments.append("Good: Top-level definitions appear to use `local`, promoting modularity.")
        else:
            comments.append(f"Potential non-local top-level definition found near: '{non_local_top_level.group(0).strip()}'. Fennel typically transpiles to local Lua definitions.")
            
        return min(score, max_score), comments

    def evaluate_error_handling(self) -> Tuple[float, List[str]]:
        """
        Evaluates error handling practices (pcall, assert, error). Score out of 100.
        """
        score = 0.0
        comments = []
        max_score = 100.0
        max_score_per_item = max_score / 3.0

        if re.search(r'pcall\s*\(', self.lua_code) or re.search(r'xpcall\s*\(', self.lua_code):
            score += max_score_per_item
            comments.append("Protected calls (`pcall` or `xpcall`) are used.")
        else:
            comments.append("No `pcall` or `xpcall` detected. Consider for operations that might fail.")

        if re.search(r'assert\s*\(', self.lua_code):
            score += max_score_per_item
            comments.append("Assertions (`assert`) are used.")
        else:
            comments.append("No `assert` calls detected. Useful for preconditions and validation.")

        if re.search(r'\berror\s*\(', self.lua_code):
            score += max_score_per_item
            comments.append("`error()` calls are used for explicit error throwing.")
        else:
            comments.append("No `error()` calls detected for explicit error throwing.")
            
        return min(score, max_score), comments
        
    def evaluate_all(self) -> Dict:
        """
        Runs all evaluations and returns a comprehensive report including a final weighted score.
        """
        evaluations_results = {
            "functional_correctness": self.evaluate_functional_correctness(),
            "semantic_equivalence": self.evaluate_semantic_equivalence(),
            "code_quality_lua": self.evaluate_code_quality_lua(),
            "structural_similarity_lua": self.evaluate_structural_similarity_lua(),
            "error_handling": self.evaluate_error_handling()
        }
        
        final_weighted_score = 0.0
        detailed_results_output = {}
        
        for category_key, (score_value, comments_list) in evaluations_results.items():
            category_weight = self.weights[category_key]
            final_weighted_score += score_value * category_weight
            detailed_results_output[category_key] = {
                'score': round(score_value, 2),
                'weight': category_weight,
                'weighted_contribution': round(score_value * category_weight, 2),
                'comments': comments_list
            }
            
        return {
            'final_weighted_score': round(final_weighted_score, 2),
            'detailed_results': detailed_results_output
        }

def main():
    # Define file paths
    fennel_file_path = "process-numbers.fnl"  # input code in Fennel
    lua_file_path = "process-numbers.lua"    # transpiled code in Lua

    try:
        with open(fennel_file_path, 'r') as f:
            fennel_code_from_file = f.read()
        with open(lua_file_path, 'r') as f:
            lua_code_from_file = f.read()
    except FileNotFoundError:
        print(f"Error: One or both files not found. Make sure '{fennel_file_path}' and '{lua_file_path}' exist.")
        # Fallback to example code if files are not found, or handle error as preferred
        print("Falling back to internal example code.")
        fennel_code_from_file = """
(fn process-numbers [numbers]
  (var sum-even 0)
  (var max-number (table.unpack numbers 1))

  (each [_ num (ipairs numbers)]
    (when (= (math.fmod num 2) 0)
      (set sum-even (+ sum-even num)))

    (when (> num max-number)
      (set max-number num)))

  {:sum-even sum-even
   :max-number max-number})
"""
        lua_code_from_file = """
local function process_numbers(numbers)
  local sum_even = 0
  local max_number = table.unpack(numbers, 1)
  for _, num in ipairs(numbers) do
    if (math.fmod(num, 2) == 0) then
      sum_even = (sum_even + num)
    else
    end
    if (num > max_number) then
      max_number = num
    else
    end
  end
  return {["sum-even"] = sum_even, ["max-number"] = max_number}
end
return process_numbers
"""
    
    evaluator = LuaFennelTranspiledCodeEvaluator(lua_code_from_file, fennel_code_from_file)
    results = evaluator.evaluate_all()
    
    print("\nFennel-to-Lua Transpiled Code Quality Evaluation Report")
    print("=" * 70)
    print(f"\nOverall Weighted Score: {results['final_weighted_score']}/100\n")
    print("Detailed Analysis:")
    print("-" * 70)
    
    for category, data in results['detailed_results'].items():
        category_title = category.replace('_', ' ').title()
        if "Lua" not in category_title: # Avoid "Code Quality Lua Lua"
             category_title = category_title.replace(" Lua", "")

        print(f"\nCategory: {category_title}")
        print(f"  Score: {data['score']:.2f}/100")
        print(f"  Weight: {data['weight']:.2f}")
        print(f"  Weighted Contribution to Final Score: {data['weighted_contribution']:.2f}")
        if data['comments']:
            print("  Comments:")
            for comment in data['comments']:
                print(f"    - {comment}")
    print("=" * 70)

if __name__ == "__main__":
    main()

                    

Script used for TypeScriptToLua

This script evaluates the Lua code transpiled from TypeScript using TypeScriptToLua. This evaluation focuses on functional correctness, semantic equivalence, code quality, structural similarity, and error handling.


import re
from typing import Dict, List, Tuple

class LuaTranspiledCodeEvaluator:
    def __init__(self, lua_code: str, typescript_code: str = ""):
        """
        Initializes the evaluator with the Lua code.
        The typescript_code parameter is for potential future use in more advanced comparisons.
        """
        self.lua_code = lua_code
        self.typescript_code = typescript_code 
        self.weights = {
            "functional_correctness": 0.40,
            "semantic_equivalence": 0.25,
            "code_quality_lua": 0.15,
            "structural_similarity_lua": 0.10,
            "error_handling": 0.10
        }

    def evaluate_functional_correctness(self) -> Tuple[float, List[str]]:
        """
        Proxy evaluation for functional correctness.
        Checks for basic syntax, return statements, and essential constructs.
        Score is out of 100.
        """
        comments = []
        score = 0.0

        # 1. Require statements for dependencies (e.g., lualib_bundle) - Max 25 points
        if re.search(r'require\s*\(\s*"lualib_bundle"\s*\)', self.lua_code):
            score += 25
            comments.append("Transpiler helper library 'lualib_bundle' is correctly required.")
        else:
            comments.append("Transpiler helper library 'lualib_bundle' not found. This might be an issue if it's expected from the transpiler (e.g., TypeScriptToLua).")

        # 2. Function definitions, block structure, and return statements - Max 35 points
        functions_found = re.findall(r"function\s+([\w.:]+)\s*\(", self.lua_code)
        earned_function_score = 0

        if not functions_found:
            comments.append("No standard function definitions (e.g., `function name(...)`) found.")
        else:
            # Basic check for block endings `end`. This is a very rough heuristic.
            block_openers = len(re.findall(r'\b(function|if|for|while|repeat)\b', self.lua_code))
            # Count `end` not preceded by `.` (to avoid `object.end`)
            block_enders = len(re.findall(r'(?= block_openers and block_openers > 0:
                earned_function_score += 10
                comments.append(f"Basic block structure plausible: {block_openers} openers vs {block_enders} 'end' keywords.")
            elif block_openers > 0:
                comments.append(f"Potential mismatch in block structures: {block_openers} openers vs {block_enders} 'end' keywords. (This is a rough check).")
            
            # Check for return in the specific `Main.processNumbers` function from the example
            process_numbers_func_match = re.search(r"function\s+Main\.processNumbers\s*\([\s\S]*?end", self.lua_code)
            if process_numbers_func_match:
                if re.search(r"\breturn\s*\{", process_numbers_func_match.group(0)):
                    earned_function_score += 25
                    comments.append("`Main.processNumbers` function includes a table return statement `return { ... }`, as expected from the TypeScript example.")
                else:
                    comments.append("`Main.processNumbers` function found, but expected table return statement `return { ... }` is missing or not in the expected format.")
            else:
                comments.append("`Main.processNumbers` function definition not found or not in the expected format.")
        score += min(earned_function_score, 35.0) # Cap points for this section

        # 3. Loop structure (e.g., `for ... in ipairs`) - Max 20 points
        if re.search(r"for\s+.*?\s+in\s+ipairs\s*\(.*?\)\s*do[\s\S]*?end", self.lua_code):
            score += 20
            comments.append("`for ... in ipairs(...) do ... end` loop structure found, good for array iteration.")
        else:
            comments.append("Expected `for ... in ipairs(...)` loop structure not found. Other loop types might be used, or it might be missing if not applicable.")

        # 4. Conditional structure (e.g., `if ... then ... end`) - Max 20 points
        if re.search(r"if\s+.*?\s+then[\s\S]*?end", self.lua_code):
            score += 20
            comments.append("`if ... then ... end` conditional structure found.")
        else:
            comments.append("No `if ... then ... end` conditional structures found.")
        
        return min(score, 100.0), comments

    def evaluate_semantic_equivalence(self) -> Tuple[float, List[str]]:
        """
        Heuristic evaluation of semantic equivalence.
        Checks for transpilation patterns like loop types, conditionals, return types, and indexing.
        Score is out of 100.
        """
        score = 0.0
        comments = []

        # 1. Loop transpilation (e.g., TypeScript `for..of` to Lua `ipairs`) - Max 25 points
        if re.search(r'for\s+_\w*,\s*\w+\s+in\s+ipairs\s*\(', self.lua_code):
            score += 25
            comments.append("Detected `for ... in ipairs(...)` loop, good for TypeScript `for...of` array iteration.")
        else:
            comments.append("`for ... in ipairs(...)` not found. Check if array iteration is handled correctly if applicable.")

        # 2. Conditional statement transpilation - Max 20 points
        if re.search(r'if\s+.+?\s+then[\s\S]+?end', self.lua_code):
            score += 20
            comments.append("`if...then...end` structures are present, indicating conditional logic translation.")
        else:
            comments.append("No `if...then...end` structures detected.")

        # 3. Object/struct return to Lua table return - Max 25 points
        if re.search(r'return\s*\{\s*\w+\s*=\s*\w+(?:,\s*\w+\s*=\s*\w+)*\s*\}', self.lua_code):
            score += 25
            comments.append("Detected `return { key = value, ... }` pattern, good for returning objects/structs from TypeScript.")
        else:
            comments.append("Expected `return { key = value, ... }` pattern for object return not found.")

        # 4. Class method structure maintenance (e.g. `Main.processNumbers`) - Max 15 points
        # Excludes typical constructor names from this specific check
        if re.search(r'function\s+\w+\.\w+\s*\(', self.lua_code) and \
           not re.search(r'\.prototype\.____constructor', self.lua_code) and \
           not re.search(r':new\b', self.lua_code) and \
           not re.search(r'\.new\b', self.lua_code):
            score += 15
            comments.append("General class method structure (e.g., `ClassName.methodName`) seems to be maintained.")
        else:
            comments.append("General class method structure (e.g., `ClassName.methodName`) not clearly detected (aside from typical constructors).")
            
        # 5. Array indexing (Lua is 1-based, TS is 0-based) - Max 15 points
        # Example: `numbers[0]` in TS became `numbers[1]` in the example Lua.
        if re.search(r'numbers\[1\]', self.lua_code) and not re.search(r'numbers\[0\]', self.lua_code):
            score += 15
            comments.append("Detected 1-based array indexing (e.g., `numbers[1]`), correct for Lua if original TS was 0-based.")
        elif re.search(r'numbers\[0\]', self.lua_code):
            comments.append("Warning: Detected 0-based array indexing (e.g., `numbers[0]`). This is incorrect for Lua and may indicate a transpilation error.")
        else:
            comments.append("Specific `numbers[1]` indexing pattern not found; general indexing correctness is crucial but harder to verify broadly with regex.")
            
        return max(0.0, min(100.0, score)), comments

    def evaluate_code_quality_lua(self) -> Tuple[float, List[str]]:
        """
        Evaluates Lua code quality: local variable usage, performance hints, ipairs/pairs.
        Score is out of 100.
        """
        comments = []
        current_score = 0.0

        # A. Local variable usage within functions (Max 50 points)
        func_bodies_matches = re.finditer(r"function\s+[\w.:]+\s*\((.*?)\)([\s\S]*?)end", self.lua_code)
        
        total_assignments_in_funcs = 0
        local_vars_in_funcs_count = 0 
        
        has_functions_with_bodies = False
        for match in func_bodies_matches:
            has_functions_with_bodies = True
            params_str, body = match.groups()
            
            if params_str.strip():
                local_vars_in_funcs_count += len([p for p in params_str.split(',') if p.strip()])

            # Assignments like `var = value` (simplistic regex)
            # Exclude assignments to table fields like `Main.name =` or `self.foo =` from "non-local" penalty here.
            # Focus on simple variable assignments: `sumEven = 0`
            assignments_in_body = re.findall(r"\b([a-zA-Z_][a-zA-Z0-9_]*)\s*=\s*[^=]", body)
            # Filter out self.var assignments from being counted as "global" assignments
            potential_non_locals = [a for a in assignments_in_body if not (a.startswith("self.") or "." in a)]
            total_assignments_in_funcs += len(potential_non_locals)
            
            local_defs_in_body = re.findall(r"\blocal\s+([a-zA-Z_][a-zA-Z0-9_]*)\s*=", body)
            local_vars_in_funcs_count += len(local_defs_in_body)
        
        local_ratio_score = 0
        if has_functions_with_bodies:
            if total_assignments_in_funcs > 0 : # Count only assignments that are not explicitly local
                # Effective locals = params + explicit locals
                # Total "assignable" vars = non-local assignments + explicit locals
                
                # Count all variable names that are assigned to, or are parameters.
                all_vars_assigned_or_param = local_vars_in_funcs_count + total_assignments_in_funcs
                if all_vars_assigned_or_param > 0:
                    local_ratio = local_vars_in_funcs_count / float(all_vars_assigned_or_param)
                    local_ratio_score = local_ratio * 50
                else: # No assignments or params found in function bodies
                    local_ratio_score = 50 # Assume good if nothing to check
                comments.append(f"Heuristic local variable usage in functions: {local_vars_in_funcs_count} locals/params out of ~{all_vars_assigned_or_param} potential vars. Score contribution: {local_ratio_score:.2f}/50.")
                if local_ratio_score < 35: # Threshold for comment
                     comments.append("Consider declaring more variables as 'local' within functions for better scoping and potential performance.")
            else: # No non-local assignments found, or only params/locals
                local_ratio_score = 50 
                comments.append("Good: Variables within functions appear to be parameters or explicitly local, or no simple assignments found.")
        else:
            local_ratio_score = 50 # No functions, so this check is not applicable in a penalty way.
            comments.append("No functions found to evaluate local variable usage within them.")

        # Specific check for the example's variables
        if re.search(r"local\s+sumEven\s*=", self.lua_code) and re.search(r"local\s+maxNumber\s*=", self.lua_code):
            comments.append("'sumEven' and 'maxNumber' are correctly declared as local in the example.")
            # Boost score if specific important variables are local, ensuring it reflects this known good practice.
            local_ratio_score = max(local_ratio_score, 40.0) 
        elif has_functions_with_bodies: 
            comments.append("Warning: In the example context, 'sumEven' or 'maxNumber' might not be 'local'. Ensure all function variables are properly scoped.")

        current_score += min(local_ratio_score, 50.0)

        # B. Performance Practices (Max 30 points)
        perf_score = 30.0
        # String concatenation `..` in loops
        loop_blocks = re.findall(r'(for.*?do.*?end|while.*?do.*?end|repeat.*?until.*?)', self.lua_code, re.DOTALL)
        concat_in_loop = False
        for block in loop_blocks:
            if '..' in block:
                concat_in_loop = True
                break
        if concat_in_loop:
            perf_score -= 15
            comments.append("String concatenation '..' detected, potentially inside a loop. If building strings iteratively, consider `table.concat` for better performance.")
        
        if re.search(r'for\s+\w+\s*=\s*1\s*,\s*#', self.lua_code):
            perf_score -= 15
            comments.append("Numeric for loop `for i=1,#table` detected. If table is large and not modified in loop, consider caching its length: `local len = #table; for i=1,len do ... end`.")
        current_score += max(0.0, perf_score)

        # C. Proper use of ipairs/pairs for table iteration (Max 20 points)
        uses_ipairs = bool(re.search(r'ipairs\s*\(', self.lua_code))
        uses_pairs = bool(re.search(r'pairs\s*\(', self.lua_code))
        has_for_loops = "for " in self.lua_code

        iter_score = 0
        if uses_ipairs:
            iter_score += 10
            comments.append("`ipairs` is used, suitable for iterating over sequence-like tables (arrays).")
        if uses_pairs:
            iter_score += 10
            comments.append("`pairs` is used, suitable for iterating over general tables (hash maps).")
        
        if has_for_loops and not uses_ipairs and not uses_pairs:
            comments.append("Loops are present, but neither `ipairs` nor `pairs` detected. Ensure appropriate iterators are used if iterating over tables.")
        elif not has_for_loops: 
             iter_score = 20 
             comments.append("No table iteration loops found, so ipairs/pairs check is not directly applicable here.")
        current_score += min(iter_score, 20.0)
        
        return min(current_score, 100.0), comments

    def evaluate_structural_similarity_lua(self) -> Tuple[float, List[str]]:
        """
        Evaluates structural similarity to common TypeScript-to-Lua transpilation patterns.
        Checks for class/module structure, constructor, method definitions. Score out of 100.
        """
        score = 0.0
        comments = []

        # 1. Transpiler helper library require (e.g., lualib_bundle) - Max 25 points
        if re.search(r'require\s*\(\s*"lualib_bundle"\s*\)', self.lua_code):
            score += 25
            comments.append("Presence of `require(\"lualib_bundle\")` matches common TSTL structure.")
        else:
            comments.append("`require(\"lualib_bundle\")` not found. Structure might differ if another transpiler or no library is used.")

        # 2. Class declaration pattern - Max 25 points
        if re.search(r'\w+\s*=\s*__TS__Class\s*\(\s*\)', self.lua_code): # TSTL specific
            score += 25
            comments.append("Class declaration pattern `ClassName = __TS__Class()` found, typical for TSTL.")
        elif re.search(r'\w+\s*=\s*\{\s*\}\s*;?\s*(?:self\.\w+|local\s+\w+)\s*=\s*\1\s*;?\s*\1\.\_\_index\s*=\s*\1', self.lua_code, re.IGNORECASE) or \
             re.search(r'local\s+\w+\s*=\s*\{\s*\}\s*;?\s*\w+\.\_\_index\s*=\s*\w+', self.lua_code): # Common Lua OOP
            score += 15 # Partial points for generic Lua OOP
            comments.append("A common Lua OOP class structure detected (metatable-based).")
        else:
            comments.append("TSTL class declaration pattern `__TS__Class()` not found, nor other obvious simple Lua class patterns.")

        # 3. Constructor pattern - Max 25 points
        if re.search(r'function\s+\w+\.prototype\.____constructor\s*\(self\)', self.lua_code): # TSTL specific
            score += 25
            comments.append("TSTL constructor pattern `function Class.prototype.____constructor(self)` found.")
        elif re.search(r'function\s+\w+[:.]new\s*\(', self.lua_code): # Common Lua constructor (Class:new or Class.new)
            score += 15 # Partial points for generic Lua constructor
            comments.append("Common Lua constructor pattern (e.g., `Class:new` or `Class.new`) detected.")
        else:
            comments.append("TSTL constructor pattern `____constructor` or common Lua `new` method not found.")

        # 4. Method definition pattern - Max 25 points
        # Exclude constructor from this specific check
        is_tstl_constructor = r'\.prototype\.____constructor'
        is_lua_new = r'[:.]new\b' # `\b` for word boundary
        
        # TSTL-like method: Main.processNumbers(self, ...)
        tstl_method_pattern = r'function\s+\w+\.(?!prototype)([\w]+)\s*\(self' 
        # Lua-like method: Main:processNumbers(...)
        lua_method_pattern = r'function\s+\w+:([\w]+)\s*\('

        if re.search(tstl_method_pattern, self.lua_code) and not re.search(is_tstl_constructor, self.lua_code):
            score += 25
            comments.append("TSTL-like method definition pattern `function ClassName.methodName(self, ...)` found.")
        elif re.search(lua_method_pattern, self.lua_code) and not re.search(is_lua_new, self.lua_code):
            score += 15 
            comments.append("Common Lua method definition pattern `function ClassName:methodName(...)` detected.")
        else:
            comments.append("Typical method definition patterns (TSTL `Class.method(self,...)` or Lua `Class:method(...)`) not clearly detected (aside from constructor).")
            
        return min(score, 100.0), comments

    def evaluate_error_handling(self) -> Tuple[float, List[str]]:
        """
        Evaluates error handling practices (pcall, assert, error). Score out of 100.
        """
        score = 0.0
        comments = []
        max_score_per_item = 100.0 / 3.0 # Roughly 33.33 for each

        if re.search(r'pcall\s*\(', self.lua_code) or re.search(r'xpcall\s*\(', self.lua_code):
            score += max_score_per_item
            comments.append("Protected calls (`pcall` or `xpcall`) are used, good for catching errors.")
        else:
            comments.append("No `pcall` or `xpcall` detected. Consider using them for robust error handling where operations might fail.")

        if re.search(r'assert\s*\(', self.lua_code):
            score += max_score_per_item
            comments.append("Assertions (`assert`) are used, good for preconditions, validations, and early error detection.")
        else:
            comments.append("No `assert` calls detected. Assertions can help catch logical issues early during development and testing.")

        if re.search(r'\berror\s*\(', self.lua_code): 
            score += max_score_per_item
            comments.append("`error()` calls are used for explicitly throwing errors.")
        else:
            comments.append("No `error()` calls detected for explicit error throwing when irrecoverable situations occur.")
            
        return min(score, 100.0), comments
        
    def evaluate_all(self) -> Dict:
        """
        Runs all evaluations and returns a comprehensive report including a final weighted score.
        """
        evaluations_results = {
            "functional_correctness": self.evaluate_functional_correctness(),
            "semantic_equivalence": self.evaluate_semantic_equivalence(),
            "code_quality_lua": self.evaluate_code_quality_lua(),
            "structural_similarity_lua": self.evaluate_structural_similarity_lua(),
            "error_handling": self.evaluate_error_handling()
        }
        
        final_weighted_score = 0.0
        detailed_results_output = {}
        
        for category_key, (score_value, comments_list) in evaluations_results.items():
            category_weight = self.weights[category_key]
            final_weighted_score += score_value * category_weight
            detailed_results_output[category_key] = {
                'score': round(score_value, 2),
                'weight': category_weight,
                'weighted_contribution': round(score_value * category_weight, 2),
                'comments': comments_list
            }
            
        return {
            'final_weighted_score': round(final_weighted_score, 2),
            'detailed_results': detailed_results_output
        }

def main():
    lua_code_example = """
local ____lualib = require("lualib_bundle")
local __TS__Class = ____lualib.__TS__Class
Main = __TS__Class()
Main.name = "Main"
function Main.prototype.____constructor(self)
end
function Main.processNumbers(self, numbers)
    local sumEven = 0
    local maxNumber = numbers[1]
    for ____, num in ipairs(numbers) do
        if num % 2 == 0 then
            sumEven = sumEven + num
        end
        if num > maxNumber then
            maxNumber = num
        end
    end
    return {sumEven = sumEven, maxNumber = maxNumber}
end
"""
    
    evaluator = LuaTranspiledCodeEvaluator(lua_code_example)
    results = evaluator.evaluate_all()
    
    print("\nLua Transpiled Code Quality Evaluation Report")
    print("=" * 60)
    print(f"\nOverall Weighted Score: {results['final_weighted_score']}/100\n")
    print("Detailed Analysis:")
    print("-" * 60)
    
    for category, data in results['detailed_results'].items():
        category_title = category.replace('_', ' ').title()
        # For "Code Quality Lua" and "Structural Similarity Lua", keep "Lua" in title
        if "Lua" not in category_title:
             category_title = category_title.replace(" Lua", "")

        print(f"\nCategory: {category_title}")
        print(f"  Score: {data['score']}/100")
        print(f"  Weight: {data['weight']:.2f}")
        print(f"  Weighted Contribution to Final Score: {data['weighted_contribution']:.2f}")
        if data['comments']:
            print("  Comments:")
            for comment in data['comments']:
                print(f"    - {comment}")
    print("=" * 60)

if __name__ == "__main__":
    main()