# Library to supress warnings or deprecation notes 
import warnings
warnings.filterwarnings('ignore')

# Libraries to help with reading and manipulating data
import numpy as np
import pandas as pd

# Libraries to help with data visualization
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns


# from google.colab import files
# uploaded = files.upload()


# loading the datset

df = pd.read_csv('Marketing data.csv')
df.head()


#Checking the info

df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2240 entries, 0 to 2239
Data columns (total 27 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   ID                   2240 non-null   int64  
 1   Year_Birth           2240 non-null   int64  
 2   Education            2240 non-null   object 
 3   Marital_Status       2240 non-null   object 
 4   Income               2216 non-null   float64
 5   Kidhome              2240 non-null   int64  
 6   Teenhome             2240 non-null   int64  
 7   Recency              2240 non-null   int64  
 8   MntWines             2240 non-null   int64  
 9   MntFruits            2240 non-null   int64  
 10  MntMeatProducts      2240 non-null   int64  
 11  MntFishProducts      2240 non-null   int64  
 12  MntSweetProducts     2240 non-null   int64  
 13  MntGoldProds         2240 non-null   int64  
 14  NumDealsPurchases    2240 non-null   int64  
 15  NumWebPurchases      2240 non-null   int64  
 16  NumCatalogPurchases  2240 non-null   int64  
 17  NumStorePurchases    2240 non-null   int64  
 18  NumWebVisitsMonth    2240 non-null   int64  
 19  AcceptedCmp1         2240 non-null   int64  
 20  AcceptedCmp2         2240 non-null   int64  
 21  AcceptedCmp3         2240 non-null   int64  
 22  AcceptedCmp4         2240 non-null   int64  
 23  AcceptedCmp5         2240 non-null   int64  
 24  AcceptedCmp6         2240 non-null   int64  
 25  Complain             2240 non-null   int64  
 26  Country              2240 non-null   object 
dtypes: float64(1), int64(23), object(3)
memory usage: 472.6+ KB


# % Null values in the Income column

(df.isnull().sum()/df.shape[0]*100)['Income']

1.0714285714285714


# num_cols contain numerical varibales 
num_cols=['Year_Birth','Income','Recency', 'MntWines', 'MntFruits',
       'MntMeatProducts', 'MntFishProducts', 'MntSweetProducts',
       'MntGoldProds', 'NumDealsPurchases', 'NumWebPurchases',
       'NumCatalogPurchases', 'NumStorePurchases', 'NumWebVisitsMonth','Kidhome',
       'Teenhome']


# printing descriptive statistics of numerical columns

#Uncomment the following code and fill in the blanks
df[num_cols].describe().T


#cat_cols contain categorical variables
cat_cols=['Education', 'Marital_Status', 'AcceptedCmp3', 'AcceptedCmp4', 'AcceptedCmp5', 'AcceptedCmp1',
       'AcceptedCmp2', 'AcceptedCmp6', 'Complain', 'Country']


# Printing the count of each unique value in each column

for column in cat_cols:
    print(df[column].value_counts(normalize=True))
    print("-" * 40)

Graduation    0.503125
PhD           0.216964
Master        0.165179
2n Cycle      0.090625
Basic         0.024107
Name: Education, dtype: float64
----------------------------------------
Married     0.385714
Together    0.258929
Single      0.214286
Divorced    0.103571
Widow       0.034375
Alone       0.001339
Absurd      0.000893
YOLO        0.000893
Name: Marital_Status, dtype: float64
----------------------------------------
0    0.927232
1    0.072768
Name: AcceptedCmp3, dtype: float64
----------------------------------------
0    0.935714
1    0.064286
Name: AcceptedCmp4, dtype: float64
----------------------------------------
0    0.925446
1    0.074554
Name: AcceptedCmp5, dtype: float64
----------------------------------------
0    0.927232
1    0.072768
Name: AcceptedCmp1, dtype: float64
----------------------------------------
0    0.850893
1    0.149107
Name: AcceptedCmp2, dtype: float64
----------------------------------------
0    0.986607
1    0.013393
Name: AcceptedCmp6, dtype: float64
----------------------------------------
0    0.990625
1    0.009375
Name: Complain, dtype: float64
----------------------------------------
SP     0.488839
SA     0.150446
CA     0.119643
AUS    0.071429
IND    0.066071
GER    0.053571
US     0.048661
ME     0.001339
Name: Country, dtype: float64
----------------------------------------


# Replacing 2n Cycle with Master

df["Education"].replace("2n Cycle", "Master", inplace=True)


# Replacing YOLO, Alone, Absurd with Single

df["Marital_Status"].replace(["Alone",], "Single", inplace=True)


df['Marital_Status'].replace(["Absurd", "YOLO"], "Others", inplace=True)


df.Marital_Status.value_counts()

Married     864
Together    580
Single      483
Divorced    232
Widow        77
Others        4
Name: Marital_Status, dtype: int64


# creating new features to get overall picture of a customer, how much he/she has spend, 
#how many children he/she has, total campaigns accepted, etc.


# total spending by a customer
spending_col = [col for col in df.columns if 'Mnt' in col]
df['Total_Spending'] = df[spending_col].sum(axis = 1) 

#total purchases made by a customer
platform_col = [col for col in df.columns if 'Purchases' in col]
df['Total_Purchase'] = df[platform_col].sum(axis = 1) 

#total no. of childern
df['NumberofChildren'] = df['Kidhome'] + df['Teenhome']  

# Total no. of campaign accepted by a customer
campaigns_cols = [col for col in df.columns if 'Cmp' in col]
df['TotalCampaignsAcc'] = df[campaigns_cols].sum(axis=1)


# Plotting boxplot for Year_Birth, Total_Spending, Total_Purchase

cols=['Year_Birth','Total_Spending','Total_Purchase']
for i in cols:
    sns.boxplot(x=df[i])
    plt.show()


df[df['Year_Birth'] < 1900]


#keeping data for customers having birth year >1900

df = df[df['Year_Birth'] > 1900]
df


#plotting Boxplot for income

plt.figure(figsize=(10,4))
sns.boxplot(df['Income'])
plt.title('Income boxplot', size=16)
plt.show()


#Calculating the upper whisker for the Income variable

Q1 = df.quantile(q=0.25) #First quartile
Q3 = df.quantile(q=0.75) #Third quartile
IQR = Q3 - Q1            #Inter Quartile Range

upper_whisker = (Q3 + 1.5*IQR)['Income']   #Upper Whisker
print(upper_whisker)

118348.5


#Checking the observations marked as outliers
df[df.Income>upper_whisker]


#Checking the summary statistics for observations marked as outliers
df[df.Income>upper_whisker].describe().T


#Checking the summary statistics for observations not marked as outliers
df[df.Income<upper_whisker].describe().T


#Dropping 5 observations at indices 527, 731, 853, 1826, 1925
df.drop(index=[527, 731, 853, 1826, 1925], inplace=True)


#plotting displot for income

sns.displot(df['Income'], kde=True, height=5, aspect=2)
plt.title('Income distribution', size=16, )
plt.ylabel('count');


#filling null values with median

df['Income'].fillna(df.Income.median(), inplace=True)


# PLotting the % acceptance for every campaign

Camp_cols=['AcceptedCmp1', 'AcceptedCmp2', 'AcceptedCmp3', 'AcceptedCmp4', 'AcceptedCmp5', 'AcceptedCmp6']

success_campaign=(df[Camp_cols].sum()/df.shape[0])*100

# plot
success_campaign.plot(kind='bar', figsize=(6,6))
plt.ylabel("Perentage")
plt.show()


plt.figure(figsize=(8,8))
sns.swarmplot(x='TotalCampaignsAcc', y='Income', data=df)
plt.show()


# Let's see the mean income of customers
df.Income.mean()

51762.59811827957


# making dataframes of customers having income <52k and >52K
df1=df[df.Income<52000]
df2=df[df.Income>52000]

Camp_cols=['AcceptedCmp1', 'AcceptedCmp2', 'AcceptedCmp3', 'AcceptedCmp4', 'AcceptedCmp5', 'AcceptedCmp6']

#Calculating success rate of each campaign for both segments 
success_campaign1=pd.DataFrame((df1[Camp_cols].sum()/df1.shape[0])*100, columns=['Income <52K'])

success_campaign2=pd.DataFrame((df2[Camp_cols].sum()/df2.shape[0])*100, columns=['Income >52K'])

new_df=pd.concat([success_campaign1, success_campaign2], axis=1)

# plot
plt.figure(figsize=(8,8))
sns.lineplot(data=new_df)
plt.title("Percentage Acceptance of each campaign")
plt.ylabel("Percentage Acceptance of a campaign")
plt.show()


df[df['AcceptedCmp6']==1].shape

(30, 31)


grouped2=df.groupby('AcceptedCmp6').mean()['TotalCampaignsAcc']
grouped2

AcceptedCmp6
0    0.403715
1    3.633333
Name: TotalCampaignsAcc, dtype: float64


def amount_per_campaign(columns_name):
    p1=pd.DataFrame(df.groupby(['AcceptedCmp1']).mean()[columns_name]).T
    p2=pd.DataFrame(df.groupby(['AcceptedCmp2']).mean()[columns_name]).T
    p3=pd.DataFrame(df.groupby(['AcceptedCmp3']).mean()[columns_name]).T
    p4=pd.DataFrame(df.groupby(['AcceptedCmp4']).mean()[columns_name]).T
    p5=pd.DataFrame(df.groupby(['AcceptedCmp5']).mean()[columns_name]).T
    p6=pd.DataFrame(df.groupby(['AcceptedCmp6']).mean()[columns_name]).T
    pd.concat([p1,p2,p3,p4,p5,p6],axis=0).set_index([Camp_cols]).plot(kind='line', figsize=(8,8))
    plt.ylabel('Average amount spend on' + ' ' + columns_name)
    plt.show()


#here is an example showing how to use this function on the column MntWines
amount_per_campaign('MntWines')


#meat products
amount_per_campaign('MntMeatProducts')
#call the function amount_per_campaign for MntMeatProducts


# Fruit products
amount_per_campaign('MntFruits')
#call the function amount_per_campaign for MntFruits


# gold products
amount_per_campaign('MntGoldProds')
#call the function amount_per_campaign for MntGoldProds


#sweet products
amount_per_campaign('MntSweetProducts')
#call the function amount_per_campaign for MntSweetProducts


def Purchases_per_campaign(columns_name):
    dp1=pd.DataFrame(df.groupby(['AcceptedCmp1']).mean()[columns_name]).T
    dp2=pd.DataFrame(df.groupby(['AcceptedCmp2']).mean()[columns_name]).T
    dp3=pd.DataFrame(df.groupby(['AcceptedCmp3']).mean()[columns_name]).T
    dp4=pd.DataFrame(df.groupby(['AcceptedCmp4']).mean()[columns_name]).T
    dp5=pd.DataFrame(df.groupby(['AcceptedCmp5']).mean()[columns_name]).T
    dp6=pd.DataFrame(df.groupby(['AcceptedCmp6']).mean()[columns_name]).T
    pd.concat([dp1,dp2,dp3,dp4,dp5,dp6],axis=0).set_index([Camp_cols]).plot(kind='line', figsize=(8,8))
    plt.ylabel('Average' + ' ' + columns_name)
    plt.show()


#here is an example showing how to use this function on the column NumDealsPurchases
Purchases_per_campaign('NumDealsPurchases')


# store purchase
Purchases_per_campaign('NumStorePurchases')
#call the function Purchases_per_campaign for NumStorePurchases


#Catalog purchase
Purchases_per_campaign('NumCatalogPurchases')
#call the function Purchases_per_campaign for NumCatalogPurchases


#Web purchases
Purchases_per_campaign('NumWebPurchases')
#call the function Purchases_per_campaign for NumWebPurchases


#Recency

Purchases_per_campaign('Recency')


def Cat_Campaign_Relation(df, column_name):
    e1=(df.groupby([column_name]).sum()['AcceptedCmp1']/df.groupby([column_name]).count()['AcceptedCmp1'])
    e2=(df.groupby([column_name]).sum()['AcceptedCmp2']/df.groupby([column_name]).count()['AcceptedCmp2'])
    e3=(df.groupby([column_name]).sum()['AcceptedCmp3']/df.groupby([column_name]).count()['AcceptedCmp3'])
    e4=(df.groupby([column_name]).sum()['AcceptedCmp4']/df.groupby([column_name]).count()['AcceptedCmp4'])
    e5=(df.groupby([column_name]).sum()['AcceptedCmp5']/df.groupby([column_name]).count()['AcceptedCmp5'])
    e6=(df.groupby([column_name]).sum()['AcceptedCmp6']/df.groupby([column_name]).count()['AcceptedCmp6'])
    df_new=pd.concat([e1,e2,e3,e4,e5,e6],axis=1).T
    plt.figure(figsize=(8,8))
    sns.lineplot(data=df_new, markers=True, linewidth=2)
    plt.ylabel('Percentage Acceptance')
    plt.show()


#here is an example showing how to use this function on the column Education
Cat_Campaign_Relation(df, 'Education')


#NumberofChildren
Cat_Campaign_Relation(df, 'NumberofChildren')
#call the function Cat_Campaign_Relation for NumberofChildren


#Let's filter the observations with 'Others' category as they are only 4 such observations
df_rest=df[df.Marital_Status!='Others']

#call the function Cat_Campaign_Relation for Marital_Status with dataframe df_rest


#Let's filter the observations for 'ME' country as they are only 3 such observations
df_not_mexico=df[df.Country!='ME']

#Plot
plt.figure(figsize=(8,8))
sns.heatmap((df_not_mexico.groupby('Country').sum()[Camp_cols]/df_not_mexico.groupby('Country').count()[Camp_cols])*100, annot=True, fmt='0.2f', cmap="YlGnBu")

<AxesSubplot:ylabel='Country'>


#creating a list which contains name of all products

mnt_cols = [col for col in df.columns if 'Mnt' in col]

spending=df[mnt_cols].mean(axis=0)
spending.plot(kind='bar', figsize=(10,5))
plt.ylabel("Average spend by customers")
plt.show()


def amount_per_category(df, column_name):
    df_new1=((df.groupby([column_name]).sum()[mnt_cols].T)/df.groupby([column_name]).sum()['Total_Spending'])
    plt.figure(figsize=(10,8))
    sns.heatmap(df_new1.T, annot=True, cmap="YlGnBu")
    plt.show()


# plot showing the percentage of total spending of different products by a group of customers having the same education level

amount_per_category(df, 'Education')


#call the function amount_per_category for Marital_Status with dataframe df_rest
amount_per_category(df, 'Marital_Status')


#call the function amount_per_category for Country with dataframe df_not_mexico
amount_per_category(df, 'Country')


# list of cols for channels

channel_cols = [col for col in df.columns if 'Purchases' in col] 

#making dataframe of columns having purchase and taking sum of them.
channels = pd.DataFrame(df[channel_cols].sum()/df.Total_Purchase.sum(), columns=['NumberofPurchases'])

# plot
channels.plot(kind='bar', figsize=(6,6))
plt.ylabel("Percentage Purchases")
plt.show()


#Binning the income column
df['income_cat']=pd.qcut(df.Income, q=[0, 0.25, 0.50, 0.75, 1], labels=['low', 'medium', 'high', 'very_high'])


group=df.groupby('income_cat').sum()[channel_cols]
(group/group.sum()).plot(kind='bar', figsize=(8,8))
plt.ylabel("Percentage Purchases")
plt.show()


corr=df[['Income', 'NumDealsPurchases', 'NumWebPurchases', 'NumCatalogPurchases', 'NumStorePurchases' ]].corr()
dataplot = sns.heatmap(group.corr(),cmap="YlGnBu", annot=True)
#Write your code here


df3=df[df.Income>52000]
df4=df3[df3.MntWines>df3.MntWines.mean()]
new_profile=df4[df4.NumberofChildren==0]


#Calculating success rate of each campaign for both segments 
success_campaign3=pd.DataFrame(success_campaign, columns=['Overall Acceptance'])

success_campaign4=pd.DataFrame((new_profile[Camp_cols].sum()/new_profile.shape[0])*100, columns=['New Customer Profile Acceptance '])

# plot
pd.concat([success_campaign3, success_campaign4], axis=1).plot(kind='bar', figsize=(8,6))
plt.title("")
plt.ylabel("")
plt.show()

	count	mean	std	min	25%	50%	75%	max
Year_Birth	2240.0	1968.805804	11.984069	1893.0	1959.00	1970.0	1977.00	1996.0
Income	2216.0	52247.251354	25173.076661	1730.0	35303.00	51381.5	68522.00	666666.0
Recency	2240.0	49.109375	28.962453	0.0	24.00	49.0	74.00	99.0
MntWines	2240.0	303.935714	336.597393	0.0	23.75	173.5	504.25	1493.0
MntFruits	2240.0	26.302232	39.773434	0.0	1.00	8.0	33.00	199.0
MntMeatProducts	2240.0	166.950000	225.715373	0.0	16.00	67.0	232.00	1725.0
MntFishProducts	2240.0	37.525446	54.628979	0.0	3.00	12.0	50.00	259.0
MntSweetProducts	2240.0	27.062946	41.280498	0.0	1.00	8.0	33.00	263.0
MntGoldProds	2240.0	44.021875	52.167439	0.0	9.00	24.0	56.00	362.0
NumDealsPurchases	2240.0	2.325000	1.932238	0.0	1.00	2.0	3.00	15.0
NumWebPurchases	2240.0	4.084821	2.778714	0.0	2.00	4.0	6.00	27.0
NumCatalogPurchases	2240.0	2.662054	2.923101	0.0	0.00	2.0	4.00	28.0
NumStorePurchases	2240.0	5.790179	3.250958	0.0	3.00	5.0	8.00	13.0
NumWebVisitsMonth	2240.0	5.316518	2.426645	0.0	3.00	6.0	7.00	20.0
Kidhome	2240.0	0.444196	0.538398	0.0	0.00	0.0	1.00	2.0
Teenhome	2240.0	0.506250	0.544538	0.0	0.00	0.0	1.00	2.0

	ID	Year_Birth	Education	Marital_Status	Income	Kidhome	Teenhome	Recency	MntWines	MntFruits	...	AcceptedCmp3	AcceptedCmp4	AcceptedCmp5	AcceptedCmp6	Complain	Country	Total_Spending	Total_Purchase	NumberofChildren	TotalCampaignsAcc
0	1826	1970	Graduation	Divorced	84835.0	0	0	0	189	104	...	0	0	0	0	0	SP	1190	15	0	1
1	1	1961	Graduation	Single	57091.0	0	0	0	464	5	...	0	0	0	1	0	CA	577	18	0	2
2	10476	1958	Graduation	Married	67267.0	0	1	0	134	11	...	0	0	0	0	0	US	251	11	1	0
3	1386	1967	Graduation	Together	32474.0	1	1	0	10	0	...	0	0	0	0	0	AUS	11	4	2	0
4	5371	1989	Graduation	Single	21474.0	1	0	0	6	16	...	0	0	0	0	0	SP	91	8	1	2
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
2235	10142	1976	PhD	Divorced	66476.0	0	1	99	372	18	...	0	0	0	0	0	US	689	20	1	0
2236	5263	1977	2n Cycle	Married	31056.0	1	0	99	5	10	...	0	0	0	0	0	SP	55	5	1	0
2237	22	1976	Graduation	Divorced	46310.0	1	0	99	185	2	...	0	0	0	0	0	SP	309	14	1	0
2238	528	1978	Graduation	Married	65819.0	0	0	99	267	38	...	0	0	0	0	0	IND	1383	20	0	0
2239	4070	1969	PhD	Married	94871.0	0	2	99	169	24	...	1	0	1	0	0	CA	1078	18	2	3

	count	mean	std	min	25%	50%	75%	max
ID	8.0	5989.250	3525.251308	1501.0	4074.00	5445.5	8714.25	11181.0
Year_Birth	8.0	1972.500	10.028531	1949.0	1972.50	1975.5	1977.00	1982.0
Income	8.0	221604.500	179850.404431	153924.0	157090.50	157488.0	161201.50	666666.0
Kidhome	8.0	0.375	0.517549	0.0	0.00	0.0	1.00	1.0
Teenhome	8.0	0.250	0.462910	0.0	0.00	0.0	0.25	1.0
Recency	8.0	48.625	33.687376	13.0	22.50	34.0	82.00	98.0
MntWines	8.0	26.500	30.798887	1.0	1.75	14.5	43.00	85.0
MntFruits	8.0	4.500	6.524678	0.0	1.00	1.0	5.00	16.0
MntMeatProducts	8.0	621.875	846.511402	1.0	7.25	17.0	1592.00	1725.0
MntFishProducts	8.0	4.250	5.650537	1.0	1.00	2.0	3.50	17.0
MntSweetProducts	8.0	1.250	0.886405	0.0	1.00	1.0	1.25	3.0
MntGoldProds	8.0	3.750	4.131759	1.0	1.00	1.5	5.00	12.0
NumDealsPurchases	8.0	4.250	6.777062	0.0	0.00	0.0	6.75	15.0
NumWebPurchases	8.0	0.500	1.069045	0.0	0.00	0.0	0.25	3.0
NumCatalogPurchases	8.0	9.875	13.484780	0.0	0.00	0.5	23.50	28.0
NumStorePurchases	8.0	0.750	1.035098	0.0	0.00	0.5	1.00	3.0
NumWebVisitsMonth	8.0	1.125	2.031010	0.0	0.00	0.5	1.00	6.0
AcceptedCmp1	8.0	0.000	0.000000	0.0	0.00	0.0	0.00	0.0
AcceptedCmp2	8.0	0.000	0.000000	0.0	0.00	0.0	0.00	0.0
AcceptedCmp3	8.0	0.000	0.000000	0.0	0.00	0.0	0.00	0.0
AcceptedCmp4	8.0	0.000	0.000000	0.0	0.00	0.0	0.00	0.0
AcceptedCmp5	8.0	0.000	0.000000	0.0	0.00	0.0	0.00	0.0
AcceptedCmp6	8.0	0.000	0.000000	0.0	0.00	0.0	0.00	0.0
Complain	8.0	0.000	0.000000	0.0	0.00	0.0	0.00	0.0
Total_Spending	8.0	662.125	848.380884	6.0	46.25	84.5	1635.25	1730.0
Total_Purchase	8.0	15.375	18.220377	0.0	0.75	6.5	30.25	44.0
NumberofChildren	8.0	0.625	0.744024	0.0	0.00	0.5	1.00	2.0
TotalCampaignsAcc	8.0	0.000	0.000000	0.0	0.00	0.0	0.00	0.0

	count	mean	std	min	25%	50%	75%	max
ID	2205.0	5585.439456	3247.546423	0.0	2815.0	5455.0	8418.0	11191.0
Year_Birth	2205.0	1968.904308	11.705801	1940.0	1959.0	1970.0	1977.0	1996.0
Income	2205.0	51622.094785	20713.063826	1730.0	35196.0	51287.0	68281.0	113734.0
Kidhome	2205.0	0.442177	0.537132	0.0	0.0	0.0	1.0	2.0
Teenhome	2205.0	0.506576	0.544380	0.0	0.0	0.0	1.0	2.0
Recency	2205.0	49.009070	28.932111	0.0	24.0	49.0	74.0	99.0
MntWines	2205.0	306.164626	337.493839	0.0	24.0	178.0	507.0	1493.0
MntFruits	2205.0	26.403175	39.784484	0.0	2.0	8.0	33.0	199.0
MntMeatProducts	2205.0	165.312018	217.784507	0.0	16.0	68.0	232.0	1725.0
MntFishProducts	2205.0	37.756463	54.824635	0.0	3.0	12.0	50.0	259.0
MntSweetProducts	2205.0	27.128345	41.130468	0.0	1.0	8.0	34.0	262.0
MntGoldProds	2205.0	44.057143	51.736211	0.0	9.0	25.0	56.0	321.0
NumDealsPurchases	2205.0	2.318367	1.886107	0.0	1.0	2.0	3.0	15.0
NumWebPurchases	2205.0	4.100680	2.737424	0.0	2.0	4.0	6.0	27.0
NumCatalogPurchases	2205.0	2.645351	2.798647	0.0	0.0	2.0	4.0	28.0
NumStorePurchases	2205.0	5.823583	3.241796	0.0	3.0	5.0	8.0	13.0
NumWebVisitsMonth	2205.0	5.336961	2.413535	0.0	3.0	6.0	7.0	20.0
AcceptedCmp1	2205.0	0.073923	0.261705	0.0	0.0	0.0	0.0	1.0
AcceptedCmp2	2205.0	0.151020	0.358150	0.0	0.0	0.0	0.0	1.0
AcceptedCmp3	2205.0	0.073016	0.260222	0.0	0.0	0.0	0.0	1.0
AcceptedCmp4	2205.0	0.064399	0.245518	0.0	0.0	0.0	0.0	1.0
AcceptedCmp5	2205.0	0.074376	0.262442	0.0	0.0	0.0	0.0	1.0
AcceptedCmp6	2205.0	0.013605	0.115872	0.0	0.0	0.0	0.0	1.0
Complain	2205.0	0.009070	0.094827	0.0	0.0	0.0	0.0	1.0
Total_Spending	2205.0	606.821769	601.675284	5.0	69.0	397.0	1047.0	2525.0
Total_Purchase	2205.0	14.887982	7.615277	0.0	8.0	15.0	21.0	43.0
NumberofChildren	2205.0	0.948753	0.749231	0.0	0.0	1.0	1.0	3.0
TotalCampaignsAcc	2205.0	0.450340	0.894075	0.0	0.0	0.0	1.0	5.0

Project: Marketing Campaign Analysis¶

Context¶

Problem Statement¶

Data Dictionary¶

Importing libraries and overview of the dataset¶

Load the dataset¶

Check info of the dataset¶

Let's check the percentage of missing values for the Income column.¶

Let's create a list for numerical columns in the dataset and check the summary statistics¶

Question 1: Find the summary statistics for numerical columns and write your observations. (use describe function). - 4 Marks¶

Let's create a list for categorical columns in the dataset and check the count of each category¶

Data Preprocessing and Exploratory Data Analysis¶

Replacing the "2n Cycle" category with "Master" in Education and "YOLO", "Alone", and "Absurd" categories with "Single" in Marital_Status¶

Creating new features from the existing features¶

Let's check outliers for new variables - Total_Spending, Total_Purchase. Also, let's analyze the Year_Birth column as we observed above that it had a minimum value of 1893.¶

Check the outliers and impute the missing values for the Income variable¶

Check the distribution for Income¶

Analyzing all the campaigns¶

Question 2: Write your observations on acceptance rate for each campaign given in the below plot. - 4 Marks¶

Let's find out what is the acceptance rate for each campaign?¶

Let's analyze what kind of customer are accepting campaigns?¶

Question 3: Write your observations on acceptance rate for each campaign according to the income level. - 7 Marks¶

It could be that different campaigns are focussed on different set of products. Let's check if the product preference for those who accepted the campaigns is different from those who didn't - using amount spent and number of purchases¶

Use the function defined above to generate barplots for different purchasing Products¶

Question 4: Write the code and your observations on average amount spent on different products across all campaigns. - 7 Marks¶

We have analyzed the relationship between campaigns and different products. Now, let's see the relationship of campaigns with different purchasing channels.¶

Question 5: Write the code and your observations on average number of purchases from different channels across all campaigns. - 7 Marks¶

We have analyzed the relationship between campaigns and numerical variables. Let's see the relationship of campaigns with different categorical variables¶

Question 6: Write the code and your observations on percentage acceptance for different categorical variables across all campaigns. - 7 Marks¶

Check the product preferences by customers¶

Question 7: Write the code and your observations on percentage amount spent on different products for each category of the mentioned categorical variables. - 7 Marks¶

Check different channel performances¶

Question 8: Write your observations on percentage purchases from different channels for different categories of the income_cat column. - 4 Marks¶

We can also visualize the correlation by purchases from different channels and income of the customer.¶

Question 9: Find the correlation matrix for the columns mentioned below and visualize the same using heatmap. - 3 Marks¶

Question 10: Based on your analysis, write the conclusions and recommendations for the CMO to help make the next marketing campaign strategy. - 10 Marks¶

Conclusion and Recommendations¶

	ID	Year_Birth	Education	Marital_Status	Income	Kidhome	Teenhome	Recency	MntWines	MntFruits	...	AcceptedCmp3	AcceptedCmp4	AcceptedCmp5	AcceptedCmp6	Complain	Country	Total_Spending	Total_Purchase	NumberofChildren	TotalCampaignsAcc
513	11004	1893	Master	Single	60182.0	0	1	23	8	0	...	0	0	0	0	0	SA	22	4	1	0
827	1150	1899	PhD	Together	83532.0	0	0	36	755	144	...	1	0	0	0	0	SP	1853	15	0	1

	ID	Year_Birth	Education	Marital_Status	Income	Kidhome	Teenhome	Recency	MntWines	MntFruits	...	Country	Total_Spending	Total_Purchase	NumberofChildren
325	4931	1977	Graduation	Together	157146.0	0	0	13	1	0	...	SA	1730	28	0
497	1501	1982	PhD	Married	160803.0	0	0	21	55	16	...	US	1717	44	0
527	9432	1977	Graduation	Together	666666.0	1	0	23	9	14	...	SA	62	11	1
731	1503	1976	PhD	Together	162397.0	1	1	31	85	1	...	SP	107	1	2
853	5336	1971	Master	Together	157733.0	1	0	37	39	1	...	SP	59	2	1
1826	5555	1975	Graduation	Divorced	153924.0	0	0	81	1	1	...	SP	6	0	0
1925	11181	1949	PhD	Married	156924.0	0	0	85	2	1	...	CA	8	0	0
2204	8475	1973	PhD	Married	157243.0	0	1	98	20	2	...	IND	1608	37	1