agentic-coding-demo/demo-03/employee_data_processor.py

#!/usr/bin/env python3
import csv
import json
import datetime
import re

processed_records = []
skipped_records = []
total_salary = 0
dept_count = {}

def process_employee_data():
    global processed_records, skipped_records, total_salary, dept_count
    file_path = "employees.csv"
    json_output_path = "report.json"
    html_output_path = "report.html"

    try:
        with open(file_path, 'r') as f:
            reader = csv.DictReader(f)
            rows = list(reader)

        print(f"Reading file: {file_path}")
        print(f"Total rows in CSV: {len(rows)}")

        for row in rows:
            name = row['name']
            email = row['email']
            dept = row['department']
            salary = row['salary']
            hire_date = row['hire_date']

            # Validation part 1 - email
            if not email or '@' not in email or '.' not in email.split('@')[1]:
                print(f"WARNING: Invalid email for {name}: {email}")
                skipped_records.append({'name': name, 'reason': 'Invalid email'})
                continue

            # Validation part 2 - salary
            try:
                sal = int(salary)
            except:
                print(f"WARNING: Invalid salary for {name}: {salary}")
                skipped_records.append({'name': name, 'reason': 'Invalid salary'})
                continue

            if sal < 0:
                print(f"WARNING: Negative salary for {name}: {sal}")
                skipped_records.append({'name': name, 'reason': 'Negative salary'})
                continue

            # Validation part 3 - department
            valid_depts = ['ENG', 'SALES', 'MKTG', 'HR']
            if dept not in valid_depts:
                print(f"WARNING: Invalid department for {name}: {dept}")
                skipped_records.append({'name': name, 'reason': 'Invalid department'})
                continue

            # Validation part 4 - hire_date
            try:
                dt = datetime.datetime.strptime(hire_date, '%Y-%m-%d')
            except:
                print(f"WARNING: Invalid hire_date for {name}: {hire_date}")
                skipped_records.append({'name': name, 'reason': 'Invalid hire_date'})
                continue

            # Transformations
            if dept == 'ENG':
                full_dept = 'Engineering'
            elif dept == 'SALES':
                full_dept = 'Sales'
            elif dept == 'MKTG':
                full_dept = 'Marketing'
            elif dept == 'HR':
                full_dept = 'Human Resources'
            else:
                full_dept = dept

            annual_salary = sal * 12
            years_experience = (datetime.datetime.now() - dt).days / 365.25

            processed_records.append({
                'name': name,
                'email': email,
                'department': full_dept,
                'annual_salary': annual_salary,
                'hire_date': hire_date,
                'years_experience': round(years_experience, 2)
            })

            total_salary += annual_salary

            if full_dept not in dept_count:
                dept_count[full_dept] = 0
            dept_count[full_dept] += 1

        print(f"\nProcessed {len(processed_records)} valid records")
        print(f"Skipped {len(skipped_records)} invalid records")
        print(f"Total annual payroll: ${total_salary:,}")

        print("\nEmployees by department:")
        for d in dept_count:
            print(f"  {d}: {dept_count[d]}")

        # Generate JSON output
        json_data = {
            'summary': {
                'total_employees': len(processed_records),
                'skipped_records': len(skipped_records),
                'total_annual_payroll': total_salary,
                'employees_by_department': dept_count
            },
            'employees': processed_records,
            'skipped': skipped_records
        }

        with open(json_output_path, 'w') as jf:
            json.dump(json_data, jf, indent=2)
        print(f"\nJSON report written to: {json_output_path}")

        # Generate HTML output
        html_content = "<html><head><title>Employee Report</title></head><body>"
        html_content += "<h1>Employee Data Report</h1>"

        html_content += "<h2>Summary</h2>"
        html_content += "<ul>"
        html_content += f"<li>Total Employees: {len(processed_records)}</li>"
        html_content += f"<li>Skipped Records: {len(skipped_records)}</li>"
        html_content += f"<li>Total Annual Payroll: ${total_salary:,}</li>"
        html_content += "</ul>"

        html_content += "<h2>Employees by Department</h2>"
        html_content += "<ul>"
        for d in dept_count:
            html_content += f"<li>{d}: {dept_count[d]}</li>"
        html_content += "</ul>"

        html_content += "<h2>Employee Details</h2>"
        html_content += "<table border='1' cellpadding='5'>"
        html_content += "<tr><th>Name</th><th>Email</th><th>Department</th><th>Annual Salary</th><th>Hire Date</th><th>Years Exp</th></tr>"
        for emp in processed_records:
            html_content += "<tr>"
            html_content += f"<td>{emp['name']}</td>"
            html_content += f"<td>{emp['email']}</td>"
            html_content += f"<td>{emp['department']}</td>"
            html_content += f"<td>${emp['annual_salary']:,}</td>"
            html_content += f"<td>{emp['hire_date']}</td>"
            html_content += f"<td>{emp['years_experience']}</td>"
            html_content += "</tr>"
        html_content += "</table>"

        html_content += "<h2>Skipped Records</h2>"
        html_content += "<ul>"
        for skipped in skipped_records:
            html_content += f"<li>{skipped['name']}: {skipped['reason']}</li>"
        html_content += "</ul>"

        html_content += "</body></html>"

        with open(html_output_path, 'w') as hf:
            hf.write(html_content)
        print(f"HTML report written to: {html_output_path}")

        print("\nProcessing complete!")

    except FileNotFoundError:
        print(f"ERROR: File not found: {file_path}")
    except Exception as e:
        print(f"ERROR: Something went wrong: {e}")

if __name__ == "__main__":
    process_employee_data()