# prompt: write code which has "team1" = "RCB", "team2" = "PBKS", "Season" = "IPL-2025" and "date" = "6/3/2025" using the deliveries_2008_2023 and matches_2008_2023 dataset which has the similar code structure as "IPL_rcb_vs_dc_analysis_2024.ipynb"

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load the datasets
deliveries_df = pd.read_csv('deliveries_2008_2025.csv')
matches_df = pd.read_csv('matches_2008_2025.csv')

# Define the specific match details
team1 = "RCB"
team2 = "PBKS"
season = "IPL-2025"
date = "6/3/2025" # Ensure date format matches the 'date' column in matches_df

# Find the specific match in the matches dataset
match_id = matches_df[(matches_df['team1'] == team1) &
                      (matches_df['team2'] == team2) &
                      (matches_df['Season'] == season) &
                      (matches_df['date'] == date)]['match_id'].iloc[0]

print("Unique dates in matches_df:", matches_df['date'].unique())

Unique dates in matches_df: ['22/03/2025' '23/03/2025' '24/03/2025' '25/03/2025' '26/03/2025'
 '27/03/2025' '28/03/2025' '29/03/2025' '30/03/2025' '31/03/2025'
 '4/1/2025' '4/2/2025' '4/3/2025' '4/4/2025' '4/5/2025' '4/6/2025'
 '4/7/2025' '4/8/2025' '4/9/2025' '4/10/2025' '4/11/2025' '4/12/2025'
 '13-04-2025' '14-04-2025' '15-04-2025' '16-04-2025' '17-04-2025'
 '18-04-2025' '19-04-2025' '20-04-2025' '21-04-2025' '22-04-2025'
 '23-04-2025' '24-04-2025' '25-04-2025' '26-04-2025' '27-04-2025'
 '28-04-2025' '29-04-2025' '30-04-2025' '5/1/2025' '5/2/2025' .................]

# Filter the deliveries dataset for the specific match
match_deliveries_df = deliveries_df[deliveries_df['match_id'] == match_id]

print(match_deliveries_df.head())

       match_id  Season  innings batting_team bowling_team  over  ball  \
16994    202574    2025        1          RCB         PBKS     1     1   
16995    202574    2025        1          RCB         PBKS     1     1   
16996    202574    2025        1          RCB         PBKS     1     2   
16997    202574    2025        1          RCB         PBKS     1     3   
16998    202574    2025        1          RCB         PBKS     1     4   

         batsman          bowler  wide_runs  bye_runs  noball_runs  \
16994  Phil Salt  Arshdeep Singh          1         0            0   
16995  Phil Salt  Arshdeep Singh          0         0            0   
16996  Phil Salt  Arshdeep Singh          0         0            0   
16997  Phil Salt  Arshdeep Singh          0         0            0   
16998  Phil Salt  Arshdeep Singh          0         0            0   

       batsman_runs  extras_runs  total_runs player_dismissed dismissed_by  
16994             0            1           1              NaN          NaN  
16995             0            0           0              NaN          NaN  
16996             0            0           0              NaN          NaN  
16997             6            0           6              NaN          NaN  
16998             2            0           2              NaN          NaN

# Find the match_id for RCB vs PBKS on 31-03-2023 in IPL-2023
match_row = matches_df[
	(matches_df['team1'] == team1) &
	(matches_df['team2'] == team2) &
	(matches_df['Season'] == season) &
	((matches_df['date'] == date) | (matches_df['date'] == '31-03-2023'))
]
if not match_row.empty:
	match_id = match_row.iloc[0]['match_id']
else:
	raise ValueError("Match not found for RCB vs PBKS on 31-03-2023 in IPL-2023")

sns.set_style("whitegrid")

# data preparation for run distribution per over
run_distribution = deliveries_df.groupby(['match_id', 'innings', 'batting_team', 'over']).agg({'total_runs': 'sum'}).reset_index()

# Filter for the specific match
match_run_distribution = run_distribution[run_distribution['match_id'] == match_id]

# Rename 'batting_team' to 'team' for plotting
match_run_distribution = match_run_distribution.rename(columns={'batting_team': 'team'})

# plotting run distribution per over for both teams
plt.figure(figsize=(14, 6))
sns.lineplot(data=match_run_distribution, x='over', y='total_runs', hue='team', marker='o')
plt.title('Run Distribution Per Over for Match ID: {}'.format(match_id))
plt.xlabel('Over Number')
plt.ylabel('Runs Scored')
plt.xticks(range(0, 21))  # over numbers from 0 to 20
plt.legend(title='Team')
plt.show()

# calculating top scorers for each team
top_scorers = (
	match_deliveries_df.groupby(['batting_team', 'batsman'])
	.agg({'batsman_runs': 'sum'})
	.reset_index()
	.sort_values(by='batsman_runs', ascending=False)
)

plt.figure(figsize=(14, 8))
sns.barplot(data=top_scorers, x='batsman_runs', y='batsman', hue='batting_team', dodge=False)
plt.title('Top Scorers from Each Team')
plt.xlabel('Total Runs')
plt.ylabel('Batter')
plt.legend(title='Team', loc='center right')
plt.show()

top_scorers

# preparing data for bowling analysis
# (already done in previous cells, so this is just for reference)
# Ensure 'wickets_taken' exists in match_deliveries_df

if 'wickets_taken' not in match_deliveries_df.columns:
	match_deliveries_df['wickets_taken'] = match_deliveries_df['player_dismissed'].notna().astype(int)

# Use the correct columns for bowling analysis
bowling_stats = match_deliveries_df.groupby(['bowling_team', 'bowler']).agg({
	'total_runs': 'sum',
	'wickets_taken': 'sum',
	'over': 'nunique'
}).reset_index()

bowling_stats

C:\Users\DELL\AppData\Local\Temp\ipykernel_25608\3737641703.py:6: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  match_deliveries_df['wickets_taken'] = match_deliveries_df['player_dismissed'].notna().astype(int)

# Add economy rate to bowling_stats if not already present
if 'economy_rate' not in bowling_stats.columns:
    bowling_stats['economy_rate'] = bowling_stats['total_runs'] / bowling_stats['over']

# Sort for better visualization
bowling_stats_sorted = bowling_stats.sort_values(by='wickets_taken', ascending=False)

# Create the plot
fig, ax1 = plt.subplots(figsize=(14, 8))

# Bar plot for wickets
sns.barplot(data=bowling_stats_sorted, x='bowler', y='wickets_taken', hue='bowling_team', ax=ax1, alpha=0.6)
ax1.set_ylabel('Wickets Taken')
ax1.set_xlabel('Bowler')
ax1.set_title('Bowling Analysis: Wickets and Economy Rate')
ax1.legend(title='Team', bbox_to_anchor=(1.05, 1), loc='upper left')

for item in ax1.get_xticklabels():
    item.set_rotation(45)

ax2 = ax1.twinx()
sns.lineplot(data=bowling_stats_sorted, x='bowler', y='economy_rate', marker='o', sort=False, ax=ax2, color='black')
ax2.set_ylabel('Economy Rate')

plt.tight_layout()
plt.show()

# counting dismissal types using the correct column
dismissal_types = match_deliveries_df['dismissed_by'].dropna().value_counts()

plt.figure(figsize=(8, 8))
plt.pie(dismissal_types, labels=dismissal_types.index, autopct='%1.1f%%', startangle=140, colors=sns.color_palette("Set2"))
plt.title('Wickets Percentage By Each Bowler')
plt.show()

# function to calculate partnerships for a match
def calculate_partnerships(df):
    partnerships = []
    current_partnership = None
    prev_batsmen = set()
    for i, row in df.iterrows():
        striker = row['batsman']
        non_striker = None  # Not available in this dataset
        team = row['batting_team']
        runs = row['total_runs']
        wicket = row['player_dismissed']

        # Start new partnership if needed
        if current_partnership is None:
            current_partnership = {
                'team': team,
                'batter1': striker,
                'batter2': non_striker,
                'runs': 0,
                'balls': 0
            }
            prev_batsmen = {striker, non_striker}

        # Add runs and balls
        current_partnership['runs'] += runs
        current_partnership['balls'] += 1

        # If wicket falls, close partnership
        if pd.notna(wicket):
            partnerships.append(current_partnership)
            current_partnership = None
            prev_batsmen = set()

    # Add last partnership if not ended by wicket
    if current_partnership is not None:
        partnerships.append(current_partnership)
    return partnerships

# Calculate partnerships for the selected match
partnerships_data = calculate_partnerships(match_deliveries_df)
partnerships_df = pd.DataFrame(partnerships_data)

# Filter out significant partnerships (e.g., partnerships with more than 20 runs)
significant_partnerships = partnerships_df[partnerships_df['runs'] > 20]

# Sort by highest runs
significant_partnerships = significant_partnerships.sort_values(by='runs', ascending=False)

plt.figure(figsize=(12, 8))
sns.barplot(data=significant_partnerships, x='runs', y='batter1', hue='team', dodge=False)
plt.title('Significant Batting Partnerships')
plt.xlabel('Runs Scored')
plt.ylabel('Batter 1 (Partnership Initiated)')
plt.legend(title='Team')
plt.show()

# function to classify the phase of the game based on the over number
def classify_phase(over):
    if over < 6:
        return 'Powerplay'
    elif over < 16:
        return 'Middle'
    else:
        return 'Death'

# adding phase information to the dataframe
match_deliveries_df['phase'] = match_deliveries_df['over'].apply(classify_phase)

# grouping data by phase and team to calculate runs and wickets
phase_analysis = match_deliveries_df.groupby(['batting_team', 'phase']).agg({'total_runs': 'sum', 'wickets_taken': 'sum', 'over': 'count'}).rename(columns={'over': 'balls'}).reset_index()

# calculating the run rate
phase_analysis['run_rate'] = (phase_analysis['total_runs'] / phase_analysis['balls']) * 6

# plotting the phase analysis
fig, ax1 = plt.subplots(figsize=(12, 8))

# bar plot for runs scored in each phase
sns.barplot(data=phase_analysis, x='phase', y='total_runs', hue='batting_team', ax=ax1)
ax1.set_title('Phase Analysis: Runs and Wickets')
ax1.set_ylabel('Total Runs')
ax1.set_xlabel('Match Phase')

# line plot for wickets lost
ax2 = ax1.twinx()
sns.lineplot(data=phase_analysis, x='phase', y='wickets_taken', hue='batting_team', marker='o', ax=ax2, legend=False)
ax2.set_ylabel('Wickets Lost')

plt.show()

C:\Users\DELL\AppData\Local\Temp\ipykernel_25608\1756120926.py:11: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  match_deliveries_df['phase'] = match_deliveries_df['over'].apply(classify_phase)

# calculate runs and balls faced for each batter
batter_stats = match_deliveries_df.groupby('batsman').agg({'batsman_runs': 'sum', 'ball': 'count'}).rename(columns={'ball': 'balls_faced'}).reset_index()

# calculate strike rate for each batter (runs per 100 balls)
batter_stats['strike_rate'] = (batter_stats['batsman_runs'] / batter_stats['balls_faced']) * 100

# sorting batters by their strike rate
batter_stats_sorted = batter_stats.sort_values(by='strike_rate', ascending=False)

# displaying calculated strike rates along with runs scored and balls faced
batter_stats_sorted.head(10)

# merging phase information with batter stats
batter_phase_stats = match_deliveries_df.groupby(['batsman', 'phase']).agg({'batsman_runs': 'sum', 'over': 'count'}).rename(columns={'over': 'balls_faced'}).reset_index()

# calculate strike rate for each batsman-phase combination
batter_phase_stats['strike_rate'] = (batter_phase_stats['batsman_runs'] / batter_phase_stats['balls_faced']) * 100

# filtering for top performers based on overall strike rate
top_performers = batter_stats_sorted.head(5)['batsman']
batter_phase_stats_top = batter_phase_stats[batter_phase_stats['batsman'].isin(top_performers)]

# plotting strike rate across different phases for top performers
plt.figure(figsize=(10, 6))
sns.barplot(data=batter_phase_stats_top, x='batsman', y='strike_rate', hue='phase')
plt.title('Strike Rate Across Different Phases for Top Performers')
plt.xlabel('Batsman')
plt.ylabel('Strike Rate')
plt.legend(title='Match Phase')
plt.show()

# calculate cumulative runs and wickets for each ball for both teams
match_deliveries_df['cumulative_runs'] = match_deliveries_df.groupby('batting_team')['total_runs'].cumsum()
match_deliveries_df['cumulative_wickets'] = match_deliveries_df.groupby('batting_team')['wickets_taken'].cumsum()

# separate data for both teams
team1_deliveries = match_deliveries_df[match_deliveries_df['batting_team'] == 'RCB'].copy()
team2_deliveries = match_deliveries_df[match_deliveries_df['batting_team'] == 'PBKS'].copy()

# calculating overs for cumulative analysis
team1_deliveries['over_ball'] = team1_deliveries['over'] + (team1_deliveries['ball'] - 1) / 6
team2_deliveries['over_ball'] = team2_deliveries['over'] + (team2_deliveries['ball'] - 1) / 6

# plotting cumulative run rates and wickets
fig, ax = plt.subplots(figsize=(14, 8))

# plot for RCB
ax.plot(team1_deliveries['over_ball'], team1_deliveries['cumulative_runs'], color='red', label='RCB Runs')
ax.scatter(team1_deliveries[team1_deliveries['wickets_taken'] == 1]['over_ball'], team1_deliveries[team1_deliveries['wickets_taken'] == 1]['cumulative_runs'], color='red', marker='X', s=100, label='RCB Wickets')

# plot for PBKS
ax.plot(team2_deliveries['over_ball'], team2_deliveries['cumulative_runs'], color='darkred', label='PBKS Runs')
ax.scatter(team2_deliveries[team2_deliveries['wickets_taken'] == 1]['over_ball'], team2_deliveries[team2_deliveries['wickets_taken'] == 1]['cumulative_runs'], color='darkred', marker='X', s=100, label='PBKS Wickets')

ax.set_title('Cumulative Runs with Wickets for RCB and PBKS')
ax.set_xlabel('Over')
ax.set_ylabel('Cumulative Runs')
ax.legend()
plt.show()

C:\Users\DELL\AppData\Local\Temp\ipykernel_25608\310014327.py:2: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  match_deliveries_df['cumulative_runs'] = match_deliveries_df.groupby('batting_team')['total_runs'].cumsum()
C:\Users\DELL\AppData\Local\Temp\ipykernel_25608\310014327.py:3: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  match_deliveries_df['cumulative_wickets'] = match_deliveries_df.groupby('batting_team')['wickets_taken'].cumsum()

# calculate runs and wickets per over for both teams
per_over_stats = match_deliveries_df.groupby(['batting_team', 'over']).agg({'total_runs': 'sum', 'wickets_taken': 'sum'}).reset_index()

# calculate run rate for each over (runs per over)
per_over_stats['run_rate'] = per_over_stats['total_runs']  # runs per over

# separate data for RCB and PBKS for plotting
team1_per_over_stats = per_over_stats[per_over_stats['batting_team'] == 'RCB']
team2_per_over_stats = per_over_stats[per_over_stats['batting_team'] == 'PBKS']

# plotting run rates and marking wickets for each team
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(14, 8), sharex=True)


ax1.plot(team1_per_over_stats['over'], team1_per_over_stats['run_rate'], marker='o', color='red', label='RCB Run Rate')
ax1.scatter(team1_per_over_stats[team1_per_over_stats['wickets_taken'] > 0]['over'], team1_per_over_stats[team1_per_over_stats['wickets_taken'] > 0]['run_rate'], color='gold', s=100, label='Wickets')
ax1.set_title('RCB Run Rate Per Over')
ax1.set_ylabel('Runs per Over')
ax1.legend()


ax2.plot(team2_per_over_stats['over'], team2_per_over_stats['run_rate'], marker='o', color='darkred', label='PBKS Run Rate')
ax2.scatter(team2_per_over_stats[team2_per_over_stats['wickets_taken'] > 0]['over'], team2_per_over_stats[team2_per_over_stats['wickets_taken'] > 0]['run_rate'], color='blue', s=100, label='Wickets')
ax2.set_title('PBKS Run Rate Per Over')
ax2.set_xlabel('Over')
ax2.set_ylabel('Runs per Over')
ax2.legend()

plt.show()

	batting_team	batsman	batsman_runs
6	PBKS	Shashank Singh	61
11	RCB	Kohli	43
2	PBKS	Josh Inglis	39
4	PBKS	Prabhsimran	26
16	RCB	Rajat Patidar	26
13	RCB	Livingstone	25
5	PBKS	Priyansh Arya	24
10	RCB	Jitesh Sharma	24
14	RCB	Mayank Agarawal	24
17	RCB	Shepherd	17
15	RCB	Phil Salt	16
3	PBKS	Nehal Wadhera	15
8	PBKS	Stoinis	6
12	RCB	Krunal Pandya	4
0	PBKS	Azmatullah	1
9	RCB	Bhuvneshwar	1
7	PBKS	Shreyas Iyer	1
18	RCB	Yash Dayal	1
1	PBKS	Jamieson	0

	bowling_team	bowler	total_runs	wickets_taken	over
0	PBKS	Arshdeep Singh	40	3	4
1	PBKS	Azmatullah	35	1	4
2	PBKS	Chahal	37	1	4
3	PBKS	Jamieson	48	3	4
4	PBKS	Vijaykumar Vyshak	30	1	4
5	RCB	Bhuvneshwar	38	2	4
6	RCB	Hazlewood	55	1	4
7	RCB	Krunal Pandya	17	2	4
8	RCB	Shepherd	30	1	3
9	RCB	Suyash Sharma	20	0	2
10	RCB	Yash Dayal	24	1	3

	batsman	batsman_runs	balls_faced	strike_rate
17	Stoinis	6	2	300.000000
3	Jitesh Sharma	24	11	218.181818
14	Shashank Singh	61	30	203.333333
15	Shepherd	17	9	188.888889
4	Josh Inglis	39	23	169.565217
13	Rajat Patidar	26	16	162.500000
10	Phil Salt	16	10	160.000000
8	Mayank Agarawal	24	19	126.315789
7	Livingstone	25	20	125.000000
12	Priyansh Arya	24	20	120.000000

IPL Final Showdowns Analysis