import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns

def process_follower_counts_and_plot_bell_curve(file_path):
    with open(file_path, 'r') as file:
        lines = file.readlines()

    follower_counts = []

    for line in lines:
        # Extract the follower count from each line
        try:
            count = int(line.split()[-2])  # Assuming the format "@username has X followers"
            follower_counts.append(count)
        except ValueError:
            # Skip lines that don't match the expected format
            continue

    # Calculate the mean and median of the follower counts
    mean_followers = np.mean(follower_counts) if follower_counts else 0
    median_followers = np.median(follower_counts) if follower_counts else 0

    # Calculate IQR for outlier detection
    Q1 = np.percentile(follower_counts, 25)
    Q3 = np.percentile(follower_counts, 75)
    IQR = Q3 - Q1
    lower_bound = Q1 - 1.5 * IQR
    upper_bound = Q3 + 1.5 * IQR

    outliers = [x for x in follower_counts if x < lower_bound or x > upper_bound]
    num_outliers = len(outliers)

    print(f"Mean of follower counts: {mean_followers}")
    print(f"Median of follower counts: {median_followers}")
    print(f"Number of outliers: {num_outliers}")
    print(f"Max value before you're an outlier: {upper_bound}")


    # Plot a bell-curve-like chart using seaborn's distribution plot
    plt.figure(figsize=(10, 6))
    sns.set(style="whitegrid")
    sns.distplot(follower_counts, kde=True, color="blue", bins=100)
    plt.xlabel('Follower Count')
    plt.ylabel('Density')
    plt.title('Distribution of Follower Counts')
    plt.show()

# Replace 'path_to_your_file.txt' with the path to your text file
process_follower_counts_and_plot_bell_curve('reach.txt')
