Add demo-03: CSV data processor with intentional code smells
- Add employee_data_processor.py: functional but ugly code with god function, globals, hardcoded values, mixed concerns - Add employees.csv: sample data with 10 records (4 invalid for testing) - Generates report.json and report.html output files - Perfect candidate for refactoring demo
This commit is contained in:
parent
ae8c09013c
commit
2f2be8e1a0
4 changed files with 265 additions and 0 deletions
172
demo-03/employee_data_processor.py
Normal file
172
demo-03/employee_data_processor.py
Normal file
|
|
@ -0,0 +1,172 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
import csv
|
||||||
|
import json
|
||||||
|
import datetime
|
||||||
|
import re
|
||||||
|
|
||||||
|
processed_records = []
|
||||||
|
skipped_records = []
|
||||||
|
total_salary = 0
|
||||||
|
dept_count = {}
|
||||||
|
|
||||||
|
def process_employee_data():
|
||||||
|
global processed_records, skipped_records, total_salary, dept_count
|
||||||
|
file_path = "employees.csv"
|
||||||
|
json_output_path = "report.json"
|
||||||
|
html_output_path = "report.html"
|
||||||
|
|
||||||
|
try:
|
||||||
|
with open(file_path, 'r') as f:
|
||||||
|
reader = csv.DictReader(f)
|
||||||
|
rows = list(reader)
|
||||||
|
|
||||||
|
print(f"Reading file: {file_path}")
|
||||||
|
print(f"Total rows in CSV: {len(rows)}")
|
||||||
|
|
||||||
|
for row in rows:
|
||||||
|
name = row['name']
|
||||||
|
email = row['email']
|
||||||
|
dept = row['department']
|
||||||
|
salary = row['salary']
|
||||||
|
hire_date = row['hire_date']
|
||||||
|
|
||||||
|
# Validation part 1 - email
|
||||||
|
if not email or '@' not in email or '.' not in email.split('@')[1]:
|
||||||
|
print(f"WARNING: Invalid email for {name}: {email}")
|
||||||
|
skipped_records.append({'name': name, 'reason': 'Invalid email'})
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Validation part 2 - salary
|
||||||
|
try:
|
||||||
|
sal = int(salary)
|
||||||
|
except:
|
||||||
|
print(f"WARNING: Invalid salary for {name}: {salary}")
|
||||||
|
skipped_records.append({'name': name, 'reason': 'Invalid salary'})
|
||||||
|
continue
|
||||||
|
|
||||||
|
if sal < 0:
|
||||||
|
print(f"WARNING: Negative salary for {name}: {sal}")
|
||||||
|
skipped_records.append({'name': name, 'reason': 'Negative salary'})
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Validation part 3 - department
|
||||||
|
valid_depts = ['ENG', 'SALES', 'MKTG', 'HR']
|
||||||
|
if dept not in valid_depts:
|
||||||
|
print(f"WARNING: Invalid department for {name}: {dept}")
|
||||||
|
skipped_records.append({'name': name, 'reason': 'Invalid department'})
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Validation part 4 - hire_date
|
||||||
|
try:
|
||||||
|
dt = datetime.datetime.strptime(hire_date, '%Y-%m-%d')
|
||||||
|
except:
|
||||||
|
print(f"WARNING: Invalid hire_date for {name}: {hire_date}")
|
||||||
|
skipped_records.append({'name': name, 'reason': 'Invalid hire_date'})
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Transformations
|
||||||
|
if dept == 'ENG':
|
||||||
|
full_dept = 'Engineering'
|
||||||
|
elif dept == 'SALES':
|
||||||
|
full_dept = 'Sales'
|
||||||
|
elif dept == 'MKTG':
|
||||||
|
full_dept = 'Marketing'
|
||||||
|
elif dept == 'HR':
|
||||||
|
full_dept = 'Human Resources'
|
||||||
|
else:
|
||||||
|
full_dept = dept
|
||||||
|
|
||||||
|
annual_salary = sal * 12
|
||||||
|
years_experience = (datetime.datetime.now() - dt).days / 365.25
|
||||||
|
|
||||||
|
processed_records.append({
|
||||||
|
'name': name,
|
||||||
|
'email': email,
|
||||||
|
'department': full_dept,
|
||||||
|
'annual_salary': annual_salary,
|
||||||
|
'hire_date': hire_date,
|
||||||
|
'years_experience': round(years_experience, 2)
|
||||||
|
})
|
||||||
|
|
||||||
|
total_salary += annual_salary
|
||||||
|
|
||||||
|
if full_dept not in dept_count:
|
||||||
|
dept_count[full_dept] = 0
|
||||||
|
dept_count[full_dept] += 1
|
||||||
|
|
||||||
|
print(f"\nProcessed {len(processed_records)} valid records")
|
||||||
|
print(f"Skipped {len(skipped_records)} invalid records")
|
||||||
|
print(f"Total annual payroll: ${total_salary:,}")
|
||||||
|
|
||||||
|
print("\nEmployees by department:")
|
||||||
|
for d in dept_count:
|
||||||
|
print(f" {d}: {dept_count[d]}")
|
||||||
|
|
||||||
|
# Generate JSON output
|
||||||
|
json_data = {
|
||||||
|
'summary': {
|
||||||
|
'total_employees': len(processed_records),
|
||||||
|
'skipped_records': len(skipped_records),
|
||||||
|
'total_annual_payroll': total_salary,
|
||||||
|
'employees_by_department': dept_count
|
||||||
|
},
|
||||||
|
'employees': processed_records,
|
||||||
|
'skipped': skipped_records
|
||||||
|
}
|
||||||
|
|
||||||
|
with open(json_output_path, 'w') as jf:
|
||||||
|
json.dump(json_data, jf, indent=2)
|
||||||
|
print(f"\nJSON report written to: {json_output_path}")
|
||||||
|
|
||||||
|
# Generate HTML output
|
||||||
|
html_content = "<html><head><title>Employee Report</title></head><body>"
|
||||||
|
html_content += "<h1>Employee Data Report</h1>"
|
||||||
|
|
||||||
|
html_content += "<h2>Summary</h2>"
|
||||||
|
html_content += "<ul>"
|
||||||
|
html_content += f"<li>Total Employees: {len(processed_records)}</li>"
|
||||||
|
html_content += f"<li>Skipped Records: {len(skipped_records)}</li>"
|
||||||
|
html_content += f"<li>Total Annual Payroll: ${total_salary:,}</li>"
|
||||||
|
html_content += "</ul>"
|
||||||
|
|
||||||
|
html_content += "<h2>Employees by Department</h2>"
|
||||||
|
html_content += "<ul>"
|
||||||
|
for d in dept_count:
|
||||||
|
html_content += f"<li>{d}: {dept_count[d]}</li>"
|
||||||
|
html_content += "</ul>"
|
||||||
|
|
||||||
|
html_content += "<h2>Employee Details</h2>"
|
||||||
|
html_content += "<table border='1' cellpadding='5'>"
|
||||||
|
html_content += "<tr><th>Name</th><th>Email</th><th>Department</th><th>Annual Salary</th><th>Hire Date</th><th>Years Exp</th></tr>"
|
||||||
|
for emp in processed_records:
|
||||||
|
html_content += "<tr>"
|
||||||
|
html_content += f"<td>{emp['name']}</td>"
|
||||||
|
html_content += f"<td>{emp['email']}</td>"
|
||||||
|
html_content += f"<td>{emp['department']}</td>"
|
||||||
|
html_content += f"<td>${emp['annual_salary']:,}</td>"
|
||||||
|
html_content += f"<td>{emp['hire_date']}</td>"
|
||||||
|
html_content += f"<td>{emp['years_experience']}</td>"
|
||||||
|
html_content += "</tr>"
|
||||||
|
html_content += "</table>"
|
||||||
|
|
||||||
|
html_content += "<h2>Skipped Records</h2>"
|
||||||
|
html_content += "<ul>"
|
||||||
|
for skipped in skipped_records:
|
||||||
|
html_content += f"<li>{skipped['name']}: {skipped['reason']}</li>"
|
||||||
|
html_content += "</ul>"
|
||||||
|
|
||||||
|
html_content += "</body></html>"
|
||||||
|
|
||||||
|
with open(html_output_path, 'w') as hf:
|
||||||
|
hf.write(html_content)
|
||||||
|
print(f"HTML report written to: {html_output_path}")
|
||||||
|
|
||||||
|
print("\nProcessing complete!")
|
||||||
|
|
||||||
|
except FileNotFoundError:
|
||||||
|
print(f"ERROR: File not found: {file_path}")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"ERROR: Something went wrong: {e}")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
process_employee_data()
|
||||||
11
demo-03/employees.csv
Normal file
11
demo-03/employees.csv
Normal file
|
|
@ -0,0 +1,11 @@
|
||||||
|
name,email,department,salary,hire_date
|
||||||
|
Alice Smith,alice@example.com,ENG,75000,2023-01-15
|
||||||
|
Bob Jones,bob@example.com,SALES,-50000,2023-02-01
|
||||||
|
Charlie,charlie@bademail,MKTG,65000,2023-03-01
|
||||||
|
Diana Prince,diana@example.com,ENG,82000,2022-05-20
|
||||||
|
Eve Wilson,eve@example.com,HR,58000,2023-04-10
|
||||||
|
Frank Miller,frank@example.com,INVALID,70000,2023-01-25
|
||||||
|
Grace Lee,grace@example.com,SALES,0,2023-06-01
|
||||||
|
Henry Davis,henry@example.com,ENG,90000,invalid_date
|
||||||
|
Ivy Chen,ivy@example.com,MKTG,62000,2023-07-15
|
||||||
|
Jack Brown,jack@example.com,HR,55000,2023-08-01
|
||||||
|
1
demo-03/report.html
Normal file
1
demo-03/report.html
Normal file
|
|
@ -0,0 +1 @@
|
||||||
|
<html><head><title>Employee Report</title></head><body><h1>Employee Data Report</h1><h2>Summary</h2><ul><li>Total Employees: 6</li><li>Skipped Records: 4</li><li>Total Annual Payroll: $3,984,000</li></ul><h2>Employees by Department</h2><ul><li>Engineering: 2</li><li>Human Resources: 2</li><li>Sales: 1</li><li>Marketing: 1</li></ul><h2>Employee Details</h2><table border='1' cellpadding='5'><tr><th>Name</th><th>Email</th><th>Department</th><th>Annual Salary</th><th>Hire Date</th><th>Years Exp</th></tr><tr><td>Alice Smith</td><td>alice@example.com</td><td>Engineering</td><td>$900,000</td><td>2023-01-15</td><td>3.0</td></tr><tr><td>Diana Prince</td><td>diana@example.com</td><td>Engineering</td><td>$984,000</td><td>2022-05-20</td><td>3.66</td></tr><tr><td>Eve Wilson</td><td>eve@example.com</td><td>Human Resources</td><td>$696,000</td><td>2023-04-10</td><td>2.77</td></tr><tr><td>Grace Lee</td><td>grace@example.com</td><td>Sales</td><td>$0</td><td>2023-06-01</td><td>2.63</td></tr><tr><td>Ivy Chen</td><td>ivy@example.com</td><td>Marketing</td><td>$744,000</td><td>2023-07-15</td><td>2.51</td></tr><tr><td>Jack Brown</td><td>jack@example.com</td><td>Human Resources</td><td>$660,000</td><td>2023-08-01</td><td>2.46</td></tr></table><h2>Skipped Records</h2><ul><li>Bob Jones: Negative salary</li><li>Charlie: Invalid email</li><li>Frank Miller: Invalid department</li><li>Henry Davis: Invalid hire_date</li></ul></body></html>
|
||||||
81
demo-03/report.json
Normal file
81
demo-03/report.json
Normal file
|
|
@ -0,0 +1,81 @@
|
||||||
|
{
|
||||||
|
"summary": {
|
||||||
|
"total_employees": 6,
|
||||||
|
"skipped_records": 4,
|
||||||
|
"total_annual_payroll": 3984000,
|
||||||
|
"employees_by_department": {
|
||||||
|
"Engineering": 2,
|
||||||
|
"Human Resources": 2,
|
||||||
|
"Sales": 1,
|
||||||
|
"Marketing": 1
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"employees": [
|
||||||
|
{
|
||||||
|
"name": "Alice Smith",
|
||||||
|
"email": "alice@example.com",
|
||||||
|
"department": "Engineering",
|
||||||
|
"annual_salary": 900000,
|
||||||
|
"hire_date": "2023-01-15",
|
||||||
|
"years_experience": 3.0
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "Diana Prince",
|
||||||
|
"email": "diana@example.com",
|
||||||
|
"department": "Engineering",
|
||||||
|
"annual_salary": 984000,
|
||||||
|
"hire_date": "2022-05-20",
|
||||||
|
"years_experience": 3.66
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "Eve Wilson",
|
||||||
|
"email": "eve@example.com",
|
||||||
|
"department": "Human Resources",
|
||||||
|
"annual_salary": 696000,
|
||||||
|
"hire_date": "2023-04-10",
|
||||||
|
"years_experience": 2.77
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "Grace Lee",
|
||||||
|
"email": "grace@example.com",
|
||||||
|
"department": "Sales",
|
||||||
|
"annual_salary": 0,
|
||||||
|
"hire_date": "2023-06-01",
|
||||||
|
"years_experience": 2.63
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "Ivy Chen",
|
||||||
|
"email": "ivy@example.com",
|
||||||
|
"department": "Marketing",
|
||||||
|
"annual_salary": 744000,
|
||||||
|
"hire_date": "2023-07-15",
|
||||||
|
"years_experience": 2.51
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "Jack Brown",
|
||||||
|
"email": "jack@example.com",
|
||||||
|
"department": "Human Resources",
|
||||||
|
"annual_salary": 660000,
|
||||||
|
"hire_date": "2023-08-01",
|
||||||
|
"years_experience": 2.46
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"skipped": [
|
||||||
|
{
|
||||||
|
"name": "Bob Jones",
|
||||||
|
"reason": "Negative salary"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "Charlie",
|
||||||
|
"reason": "Invalid email"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "Frank Miller",
|
||||||
|
"reason": "Invalid department"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "Henry Davis",
|
||||||
|
"reason": "Invalid hire_date"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
Loading…
Add table
Add a link
Reference in a new issue