from IPython.display import HTML

HTML("""
<style>

/* ocultar la primera celda */
.jp-CodeCell:first-child {
    display:none;
}

/* tipografía general */
body {
    font-family: 'Segoe UI', Arial, sans-serif;
    line-height: 1.7;
    background:#fafafc;
}

/* títulos */
h1 {
    color:#6B8EEC;
    border-bottom:3px solid #BFD3FF;
    padding-bottom:6px;
}

h2 {
    color:#5A5A7A;
    margin-top:30px;
}

/* párrafos */
p {
    font-size:16px;
    color:#444;
}

/* bloques de código */
.jp-CodeCell pre {
    background:#f4f6ff !important;
    color:#333 !important;
    border-left:4px solid #9FB8FF;
    padding:12px;
    border-radius:6px;
}

/* resultados */
.jp-OutputArea pre {
    background:#fdf6ff;
    border-left:4px solid #D7B8FF;
    padding:10px;
    border-radius:6px;
}

/* tablas de pandas */
.dataframe {
    border-collapse: collapse !important;
    margin-top:15px;
    background:white;
    border-radius:8px;
    overflow:hidden;
}

.dataframe th {
    background:#C9D8FF !important;
    color:#333 !important;
    padding:8px;
}

.dataframe td {
    padding:8px;
    border-bottom:1px solid #eee;
}

/* cajas informativas */
.note {
    background:#EEF3FF;
    padding:15px;
    border-radius:8px;
    border-left:6px solid #9FB8FF;
}

/* listas */
ul {
    line-height:1.8;
}

</style>
""")

import pandas as pd

df = pd.read_csv("titanic.csv")

df.head()

df.tail()

df.info()

<class 'pandas.DataFrame'>
RangeIndex: 1309 entries, 0 to 1308
Data columns (total 28 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   Passengerid  1309 non-null   int64  
 1   Age          1309 non-null   float64
 2   Fare         1309 non-null   float64
 3   Sex          1309 non-null   int64  
 4   sibsp        1309 non-null   int64  
 5   zero         1309 non-null   int64  
 6   zero.1       1309 non-null   int64  
 7   zero.2       1309 non-null   int64  
 8   zero.3       1309 non-null   int64  
 9   zero.4       1309 non-null   int64  
 10  zero.5       1309 non-null   int64  
 11  zero.6       1309 non-null   int64  
 12  Parch        1309 non-null   int64  
 13  zero.7       1309 non-null   int64  
 14  zero.8       1309 non-null   int64  
 15  zero.9       1309 non-null   int64  
 16  zero.10      1309 non-null   int64  
 17  zero.11      1309 non-null   int64  
 18  zero.12      1309 non-null   int64  
 19  zero.13      1309 non-null   int64  
 20  zero.14      1309 non-null   int64  
 21  Pclass       1309 non-null   int64  
 22  zero.15      1309 non-null   int64  
 23  zero.16      1309 non-null   int64  
 24  Embarked     1307 non-null   float64
 25  zero.17      1309 non-null   int64  
 26  zero.18      1309 non-null   int64  
 27  2urvived     1309 non-null   int64  
dtypes: float64(3), int64(25)
memory usage: 286.5 KB

df.describe()

df[["Age", "Fare"]].isnull().sum()

Age     0
Fare    0
dtype: int64

df["Age"] = df["Age"].fillna(df["Age"].mean())
df["Fare"] = df["Fare"].round(2)

df["Age"]

0       22.0
1       38.0
2       26.0
3       35.0
4       35.0
        ... 
1304    28.0
1305    39.0
1306    38.5
1307    28.0
1308    28.0
Name: Age, Length: 1309, dtype: float64

df["Fare"]

0         7.25
1        71.28
2         7.92
3        53.10
4         8.05
         ...  
1304      8.05
1305    108.90
1306      7.25
1307      8.05
1308     22.36
Name: Fare, Length: 1309, dtype: float64

	Passengerid	Age	Fare	Sex	sibsp	zero	zero.1	zero.2	zero.3	zero.4	...	zero.12	zero.13	zero.14	Pclass	zero.15	zero.16	Embarked	zero.17	zero.18	2urvived
count	1309.000000	1309.000000	1309.000000	1309.000000	1309.000000	1309.0	1309.0	1309.0	1309.0	1309.0	...	1309.0	1309.0	1309.0	1309.000000	1309.0	1309.0	1307.000000	1309.0	1309.0	1309.000000
mean	655.000000	29.503186	33.281086	0.355997	0.498854	0.0	0.0	0.0	0.0	0.0	...	0.0	0.0	0.0	2.294882	0.0	0.0	1.492731	0.0	0.0	0.261268
std	378.020061	12.905241	51.741500	0.478997	1.041658	0.0	0.0	0.0	0.0	0.0	...	0.0	0.0	0.0	0.837836	0.0	0.0	0.814626	0.0	0.0	0.439494
min	1.000000	0.170000	0.000000	0.000000	0.000000	0.0	0.0	0.0	0.0	0.0	...	0.0	0.0	0.0	1.000000	0.0	0.0	0.000000	0.0	0.0	0.000000
25%	328.000000	22.000000	7.895800	0.000000	0.000000	0.0	0.0	0.0	0.0	0.0	...	0.0	0.0	0.0	2.000000	0.0	0.0	1.000000	0.0	0.0	0.000000
50%	655.000000	28.000000	14.454200	0.000000	0.000000	0.0	0.0	0.0	0.0	0.0	...	0.0	0.0	0.0	3.000000	0.0	0.0	2.000000	0.0	0.0	0.000000
75%	982.000000	35.000000	31.275000	1.000000	1.000000	0.0	0.0	0.0	0.0	0.0	...	0.0	0.0	0.0	3.000000	0.0	0.0	2.000000	0.0	0.0	1.000000
max	1309.000000	80.000000	512.329200	1.000000	8.000000	0.0	0.0	0.0	0.0	0.0	...	0.0	0.0	0.0	3.000000	0.0	0.0	2.000000	0.0	0.0	1.000000

Importación de librerías¶

Carga del dataset¶

Visualización de las primeras filas¶

Visualización de las últimas filas¶

Información general del dataset¶

Estadísticas descriptivas¶

Validación de columnas¶

Modificación de columnas¶

Visualización de columnas individuales¶

	Passengerid	Age	Fare	Sex	sibsp	...	Pclass	Embarked	2urvived
0	1	22.0	7.2500	0	1	...	3	2.0	0
1	2	38.0	71.2833	1	1	...	1	0.0	1
2	3	26.0	7.9250	1	0	...	3	2.0	1
3	4	35.0	53.1000	1	1	...	1	2.0	1
4	5	35.0	8.0500	0	0	...	3	2.0	0

	Passengerid	Age	Fare	Sex	sibsp	...	Pclass	Embarked
1304	1305	28.0	8.0500	0	0	...	3	2.0
1305	1306	39.0	108.9000	1	0	...	1	0.0
1306	1307	38.5	7.2500	0	0	...	3	2.0
1307	1308	28.0	8.0500	0	0	...	3	2.0
1308	1309	28.0	22.3583	0	1	...	3	0.0