In [1]:
from IPython.display import HTML

HTML("""
<style>

/* ocultar la primera celda */
.jp-CodeCell:first-child {
    display:none;
}

/* tipografía general */
body {
    font-family: 'Segoe UI', Arial, sans-serif;
    line-height: 1.7;
    background:#fafafc;
}

/* títulos */
h1 {
    color:#6B8EEC;
    border-bottom:3px solid #BFD3FF;
    padding-bottom:6px;
}

h2 {
    color:#5A5A7A;
    margin-top:30px;
}

/* párrafos */
p {
    font-size:16px;
    color:#444;
}

/* bloques de código */
.jp-CodeCell pre {
    background:#f4f6ff !important;
    color:#333 !important;
    border-left:4px solid #9FB8FF;
    padding:12px;
    border-radius:6px;
}

/* resultados */
.jp-OutputArea pre {
    background:#fdf6ff;
    border-left:4px solid #D7B8FF;
    padding:10px;
    border-radius:6px;
}

/* tablas de pandas */
.dataframe {
    border-collapse: collapse !important;
    margin-top:15px;
    background:white;
    border-radius:8px;
    overflow:hidden;
}

.dataframe th {
    background:#C9D8FF !important;
    color:#333 !important;
    padding:8px;
}

.dataframe td {
    padding:8px;
    border-bottom:1px solid #eee;
}

/* cajas informativas */
.note {
    background:#EEF3FF;
    padding:15px;
    border-radius:8px;
    border-left:6px solid #9FB8FF;
}

/* listas */
ul {
    line-height:1.8;
}

</style>
""")
Out[1]:
In [2]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, count, avg, desc

spark = SparkSession.builder.master("local").appName("Netflix").getOrCreate()

file_path = "netflix.csv"
df = spark.read.csv(file_path, header=True, inferSchema=True)

df.show()
print(df.columns)
+-------+-------+--------------------+--------------------+--------------------+--------------------+------------------+------------+------+---------+--------------------+--------------------+
|show_id|   type|               title|            director|                cast|             country|        date_added|release_year|rating| duration|           listed_in|         description|
+-------+-------+--------------------+--------------------+--------------------+--------------------+------------------+------------+------+---------+--------------------+--------------------+
|     s1|  Movie|Dick Johnson Is Dead|     Kirsten Johnson|                NULL|       United States|September 25, 2021|        2020| PG-13|   90 min|       Documentaries|As her father nea...|
|     s2|TV Show|       Blood & Water|                NULL|Ama Qamata, Khosi...|        South Africa|September 24, 2021|        2021| TV-MA|2 Seasons|International TV ...|After crossing pa...|
|     s3|TV Show|           Ganglands|     Julien Leclercq|Sami Bouajila, Tr...|                NULL|September 24, 2021|        2021| TV-MA| 1 Season|Crime TV Shows, I...|To protect his fa...|
|     s4|TV Show|Jailbirds New Orl...|                NULL|                NULL|                NULL|September 24, 2021|        2021| TV-MA| 1 Season|Docuseries, Reali...|Feuds, flirtation...|
|     s5|TV Show|        Kota Factory|                NULL|Mayur More, Jiten...|               India|September 24, 2021|        2021| TV-MA|2 Seasons|International TV ...|In a city of coac...|
|     s6|TV Show|       Midnight Mass|       Mike Flanagan|Kate Siegel, Zach...|                NULL|September 24, 2021|        2021| TV-MA| 1 Season|TV Dramas, TV Hor...|The arrival of a ...|
|     s7|  Movie|My Little Pony: A...|Robert Cullen, Jo...|Vanessa Hudgens, ...|                NULL|September 24, 2021|        2021|    PG|   91 min|Children & Family...|Equestria's divid...|
|     s8|  Movie|             Sankofa|        Haile Gerima|Kofi Ghanaba, Oya...|United States, Gh...|September 24, 2021|        1993| TV-MA|  125 min|Dramas, Independe...|On a photo shoot ...|
|     s9|TV Show|The Great British...|     Andy Devonshire|Mel Giedroyc, Sue...|      United Kingdom|September 24, 2021|        2021| TV-14|9 Seasons|British TV Shows,...|A talented batch ...|
|    s10|  Movie|        The Starling|      Theodore Melfi|Melissa McCarthy,...|       United States|September 24, 2021|        2021| PG-13|  104 min|    Comedies, Dramas|A woman adjusting...|
|    s11|TV Show|Vendetta: Truth, ...|                NULL|                NULL|                NULL|September 24, 2021|        2021| TV-MA| 1 Season|Crime TV Shows, D...|"Sicily boasts a ...|
|    s12|TV Show|    Bangkok Breaking|   Kongkiat Komesiri|Sukollawat Kanaro...|                NULL|September 23, 2021|        2021| TV-MA| 1 Season|Crime TV Shows, I...|Struggling to ear...|
|    s13|  Movie|        Je Suis Karl| Christian Schwochow|Luna Wedler, Jann...|Germany, Czech Re...|September 23, 2021|        2021| TV-MA|  127 min|Dramas, Internati...|After most of her...|
|    s14|  Movie|Confessions of an...|       Bruno Garotti|Klara Castanho, L...|                NULL|September 22, 2021|        2021| TV-PG|   91 min|Children & Family...|When the clever b...|
|    s15|TV Show|Crime Stories: In...|                NULL|                NULL|                NULL|September 22, 2021|        2021| TV-MA| 1 Season|British TV Shows,...|Cameras following...|
|    s16|TV Show|   Dear White People|                NULL|Logan Browning, B...|       United States|September 22, 2021|        2021| TV-MA|4 Seasons|TV Comedies, TV D...|"Students of colo...|
|    s17|  Movie|Europe's Most Dan...|Pedro de Echave G...|                NULL|                NULL|September 22, 2021|        2020| TV-MA|   67 min|Documentaries, In...|Declassified docu...|
|    s18|TV Show|     Falsa identidad|                NULL|Luis Ernesto Fran...|              Mexico|September 22, 2021|        2020| TV-MA|2 Seasons|Crime TV Shows, S...|Strangers Diego a...|
|    s19|  Movie|           Intrusion|          Adam Salky|Freida Pinto, Log...|                NULL|September 22, 2021|        2021| TV-14|   94 min|           Thrillers|After a deadly ho...|
|    s20|TV Show|              Jaguar|                NULL|Blanca Suárez, Iv...|                NULL|September 22, 2021|        2021| TV-MA| 1 Season|International TV ...|In the 1960s, a H...|
+-------+-------+--------------------+--------------------+--------------------+--------------------+------------------+------------+------+---------+--------------------+--------------------+
only showing top 20 rows

['show_id', 'type', 'title', 'director', 'cast', 'country', 'date_added', 'release_year', 'rating', 'duration', 'listed_in', 'description']
In [10]:
print("Total de registros:")
print(df.count())
Total de registros:
8809
In [7]:
print("Solo películas:")
df.filter(col("type") == "Movie").show()
Solo películas:
+-------+-----+--------------------+--------------------+--------------------+--------------------+------------------+------------+------+--------+--------------------+--------------------+
|show_id| type|               title|            director|                cast|             country|        date_added|release_year|rating|duration|           listed_in|         description|
+-------+-----+--------------------+--------------------+--------------------+--------------------+------------------+------------+------+--------+--------------------+--------------------+
|     s1|Movie|Dick Johnson Is Dead|     Kirsten Johnson|                NULL|       United States|September 25, 2021|        2020| PG-13|  90 min|       Documentaries|As her father nea...|
|     s7|Movie|My Little Pony: A...|Robert Cullen, Jo...|Vanessa Hudgens, ...|                NULL|September 24, 2021|        2021|    PG|  91 min|Children & Family...|Equestria's divid...|
|     s8|Movie|             Sankofa|        Haile Gerima|Kofi Ghanaba, Oya...|United States, Gh...|September 24, 2021|        1993| TV-MA| 125 min|Dramas, Independe...|On a photo shoot ...|
|    s10|Movie|        The Starling|      Theodore Melfi|Melissa McCarthy,...|       United States|September 24, 2021|        2021| PG-13| 104 min|    Comedies, Dramas|A woman adjusting...|
|    s13|Movie|        Je Suis Karl| Christian Schwochow|Luna Wedler, Jann...|Germany, Czech Re...|September 23, 2021|        2021| TV-MA| 127 min|Dramas, Internati...|After most of her...|
|    s14|Movie|Confessions of an...|       Bruno Garotti|Klara Castanho, L...|                NULL|September 22, 2021|        2021| TV-PG|  91 min|Children & Family...|When the clever b...|
|    s17|Movie|Europe's Most Dan...|Pedro de Echave G...|                NULL|                NULL|September 22, 2021|        2020| TV-MA|  67 min|Documentaries, In...|Declassified docu...|
|    s19|Movie|           Intrusion|          Adam Salky|Freida Pinto, Log...|                NULL|September 22, 2021|        2021| TV-14|  94 min|           Thrillers|After a deadly ho...|
|    s23|Movie|     Avvai Shanmughi|      K.S. Ravikumar|Kamal Hassan, Mee...|                NULL|September 21, 2021|        1996| TV-PG| 161 min|Comedies, Interna...|Newly divorced an...|
|    s24|Movie|Go! Go! Cory Cars...|Alex Woo, Stanley...|Maisie Benson, Pa...|                NULL|September 21, 2021|        2021|  TV-Y|  61 min|Children & Family...|From arcade games...|
|    s25|Movie|               Jeans|          S. Shankar|Prashanth, Aishwa...|               India|September 21, 2021|        1998| TV-14| 166 min|Comedies, Interna...|When the father o...|
|    s27|Movie|      Minsara Kanavu|         Rajiv Menon|Arvind Swamy, Kaj...|                NULL|September 21, 2021|        1997| TV-PG| 147 min|Comedies, Interna...|A tangled love tr...|
|    s28|Movie|           Grown Ups|        Dennis Dugan|Adam Sandler, Kev...|       United States|September 20, 2021|        2010| PG-13| 103 min|            Comedies|Mourning the loss...|
|    s29|Movie|          Dark Skies|       Scott Stewart|Keri Russell, Jos...|       United States|September 19, 2021|        2013| PG-13|  97 min|Horror Movies, Sc...|A family’s idylli...|
|    s30|Movie|            Paranoia|      Robert Luketic|Liam Hemsworth, G...|United States, In...|September 19, 2021|        2013| PG-13| 106 min|           Thrillers|Blackmailed by hi...|
|    s31|Movie|     Ankahi Kahaniya|Ashwiny Iyer Tiwa...|Abhishek Banerjee...|                NULL|September 17, 2021|        2021| TV-14| 111 min|Dramas, Independe...|As big city life ...|
|    s36|Movie|The Father Who Mo...|        Daniel Sandu|Adrian Titieni, E...|                NULL|September 17, 2021|        2021| TV-MA| 110 min|Dramas, Internati...|When his son goes...|
|    s37|Movie|      The Stronghold|      Cédric Jimenez|Gilles Lellouche,...|                NULL|September 17, 2021|        2021| TV-MA| 105 min|Action & Adventur...|Tired of the smal...|
|    s39|Movie| Birth of the Dragon|        George Nolfi|Billy Magnussen, ...|China, Canada, Un...|September 16, 2021|        2017| PG-13|  96 min|Action & Adventur...|A young Bruce Lee...|
|    s42|Movie|                Jaws|    Steven Spielberg|Roy Scheider, Rob...|       United States|September 16, 2021|        1975|    PG| 124 min|Action & Adventur...|When an insatiabl...|
+-------+-----+--------------------+--------------------+--------------------+--------------------+------------------+------------+------+--------+--------------------+--------------------+
only showing top 20 rows

In [8]:
print("Títulos por país:")
df.groupBy("country") \
  .agg(count("*").alias("total_titulos")) \
  .orderBy(desc("total_titulos")) \
  .show()
Títulos por país:
+--------------------+-------------+
|             country|total_titulos|
+--------------------+-------------+
|       United States|         2805|
|               India|          972|
|                NULL|          832|
|      United Kingdom|          419|
|               Japan|          245|
|         South Korea|          199|
|              Canada|          181|
|               Spain|          145|
|              France|          123|
|              Mexico|          110|
|               Egypt|          106|
|              Turkey|          105|
|             Nigeria|           93|
|           Australia|           87|
|              Taiwan|           81|
|           Indonesia|           79|
|              Brazil|           77|
|United Kingdom, U...|           75|
|         Philippines|           75|
|United States, Ca...|           73|
+--------------------+-------------+
only showing top 20 rows

In [9]:
print("Conteo por tipo:")
df.groupBy("type") \
  .agg(count("*").alias("total")) \
  .show()
Conteo por tipo:
+-------------+-----+
|         type|total|
+-------------+-----+
|         NULL|    1|
|      TV Show| 2676|
|        Movie| 6131|
|William Wyler|    1|
+-------------+-----+

In [11]:
from pyspark.sql.functions import col, count, desc

print("Top 10 directores con más títulos:")
df.filter(col("director").isNotNull()) \
  .groupBy("director") \
  .agg(count("*").alias("total_titulos")) \
  .orderBy(desc("total_titulos")) \
  .limit(10) \
  .show()
Top 10 directores con más títulos:
+--------------------+-------------+
|            director|total_titulos|
+--------------------+-------------+
|       Rajiv Chilaka|           19|
|Raúl Campos, Jan ...|           18|
|        Marcus Raboy|           16|
|         Suhas Kadav|           16|
|           Jay Karas|           14|
| Cathy Garcia-Molina|           13|
|     Youssef Chahine|           12|
|     Martin Scorsese|           12|
|         Jay Chapman|           12|
|    Steven Spielberg|           11|
+--------------------+-------------+

In [ ]: