# Pandas Analysis Example

Data analysis with Pandas

## Code Example

```python
# Pandas Data Analysis Example
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta

# Sample data generation
def generate_sample_data():
    """Generate sample sales data"""
    np.random.seed(42)
    dates = pd.date_range(start='2023-01-01', end='2023-12-31', freq='D')
    
    data = {
        'date': dates,
        'product': np.random.choice(['Laptop', 'Phone', 'Tablet', 'Monitor'], len(dates)),
        'category': np.random.choice(['Electronics', 'Computers', 'Mobile'], len(dates)),
        'sales_amount': np.random.normal(1000, 300, len(dates)),
        'quantity': np.random.randint(1, 10, len(dates)),
        'region': np.random.choice(['North', 'South', 'East', 'West'], len(dates)),
        'customer_id': np.random.randint(1000, 9999, len(dates))
    }
    
    return pd.DataFrame(data)

# Data analysis functions
def analyze_sales_trends(df):
    """Analyze sales trends over time"""
    df['month'] = df['date'].dt.month
    df['quarter'] = df['date'].dt.quarter
    
    # Monthly sales summary
    monthly_sales = df.groupby('month')['sales_amount'].agg(['sum', 'mean', 'count']).reset_index()
    monthly_sales.columns = ['month', 'total_sales', 'avg_sales', 'transaction_count']
    
    return monthly_sales

def product_performance_analysis(df):
    """Analyze product performance"""
    product_stats = df.groupby('product').agg({
        'sales_amount': ['sum', 'mean', 'count'],
        'quantity': 'sum'
    }).round(2)
    
    product_stats.columns = ['total_revenue', 'avg_sale', 'transaction_count', 'total_quantity']
    product_stats = product_stats.sort_values('total_revenue', ascending=False)
    
    return product_stats

def regional_analysis(df):
    """Analyze sales by region"""
    regional_stats = df.groupby('region').agg({
        'sales_amount': ['sum', 'mean'],
        'customer_id': 'nunique'
    }).round(2)
    
    regional_stats.columns = ['total_revenue', 'avg_sale', 'unique_customers']
    regional_stats['revenue_share'] = (regional_stats['total_revenue'] / 
                                      regional_stats['total_revenue'].sum() * 100).round(2)
    
    return regional_stats

def create_visualizations(df):
    """Create data visualizations"""
    plt.style.use('seaborn-v0_8')
    fig, axes = plt.subplots(2, 2, figsize=(15, 12))
    
    # Monthly sales trend
    monthly_data = df.groupby(df['date'].dt.month)['sales_amount'].sum()
    axes[0, 0].plot(monthly_data.index, monthly_data.values, marker='o')
    axes[0, 0].set_title('Monthly Sales Trend')
    axes[0, 0].set_xlabel('Month')
    axes[0, 0].set_ylabel('Sales Amount')
    
    # Product performance
    product_sales = df.groupby('product')['sales_amount'].sum().sort_values(ascending=True)
    axes[0, 1].barh(product_sales.index, product_sales.values)
    axes[0, 1].set_title('Product Sales Performance')
    axes[0, 1].set_xlabel('Sales Amount')
    
    # Regional distribution
    regional_sales = df.groupby('region')['sales_amount'].sum()
    axes[1, 0].pie(regional_sales.values, labels=regional_sales.index, autopct='%1.1f%%')
    axes[1, 0].set_title('Sales Distribution by Region')
    
    # Sales amount distribution
    axes[1, 1].hist(df['sales_amount'], bins=30, alpha=0.7, edgecolor='black')
    axes[1, 1].set_title('Sales Amount Distribution')
    axes[1, 1].set_xlabel('Sales Amount')
    axes[1, 1].set_ylabel('Frequency')
    
    plt.tight_layout()
    plt.savefig('sales_analysis.png', dpi=300, bbox_inches='tight')
    plt.show()

def generate_report(df):
    """Generate comprehensive sales report"""
    print("=" * 50)
    print("SALES ANALYSIS REPORT")
    print("=" * 50)
    
    # Basic statistics
    print(f"\nTotal Records: {len(df):,}")
    print(f"Date Range: {df['date'].min().strftime('%Y-%m-%d')} to {df['date'].max().strftime('%Y-%m-%d')}")
    print(f"Total Revenue: ${df['sales_amount'].sum():,.2f}")
    print(f"Average Sale: ${df['sales_amount'].mean():.2f}")
    print(f"Total Quantity Sold: {df['quantity'].sum():,}")
    
    # Top products
    print("\n" + "=" * 30)
    print("TOP PRODUCTS BY REVENUE")
    print("=" * 30)
    top_products = product_performance_analysis(df)
    print(top_products.head())
    
    # Regional analysis
    print("\n" + "=" * 30)
    print("REGIONAL ANALYSIS")
    print("=" * 30)
    regional = regional_analysis(df)
    print(regional)
    
    # Monthly trends
    print("\n" + "=" * 30)
    print("MONTHLY SALES SUMMARY")
    print("=" * 30)
    monthly = analyze_sales_trends(df)
    print(monthly)

# Main execution
if __name__ == "__main__":
    # Generate and analyze data
    df = generate_sample_data()
    
    # Perform analysis
    generate_report(df)
    
    # Create visualizations
    create_visualizations(df)
    
    # Save processed data
    df.to_csv('sales_data_processed.csv', index=False)
    print("\nAnalysis complete! Data saved to 'sales_data_processed.csv'")
```

## Files

- analysis.py
- requirements.txt
- data/sales_data.csv
- notebooks/analysis.ipynb

## Usage

```bash
# Install dependencies
npm install

# Run the example
npm start
```
