Creating Word Clouds on Google Colab Using Python Imaging Library (PIL)
Let’s do some creative work on Google Colab using Python scripts. The thing mentioned in this article is for informative purposes only, for quickly generating amazing word clouds, you can use Python libraries, like WordCloud.
Word clouds are graphical representations of word frequency in a text, where the size of each word corresponds to its frequency. They offer a visually appealing way to analyze and visualize textual data. In this tutorial, we’ll walk through process of generating word clouds in Python using the Python Imaging Library (PIL).
Step 1: Importing Necessary Libraries
We import the required libraries for our word cloud generation script.
import string
import random
from PIL import Image, ImageDraw, ImageFont
from google.colab import files
import io
Step 2: Defining the Word Cloud Generation Function
We define a function to generate the word cloud.
def generate_word_cloud(text, output_path, font_bytes, image_size=(800, 600), font_size_range=(20, 100), max_words=100):
# Initialize an image with a white background
image = Image.new("RGB", image_size, color="white")
draw = ImageDraw.Draw(image)
# Load font
font = ImageFont.truetype(io.BytesIO(font_bytes), font_size_range[1])
# Generate word frequency dictionary
word_freq = {}
for word in text.split():
# Remove punctuation
word = word.strip(string.punctuation)
# Convert to lowercase
word = word.lower()
if word:
word_freq[word] = word_freq.get(word, 0) + 1
# Sort words by frequency
sorted_word_freq = sorted(word_freq.items(), key=lambda x: x[1], reverse=True)[:max_words]
# Generate word cloud
for word, freq in sorted_word_freq:
# Random position
x = random.randint(0, image_size[0])
y = random.randint(0, image_size[1])
# Random font size within the specified range
font_size = random.randint(font_size_range[0], font_size_range[1])
font = ImageFont.truetype(io.BytesIO(font_bytes), font_size)
# Random color
color = (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255))
# Draw the word on the image
draw.text((x, y), word, fill=color, font=font)
# Save the image
image.save(output_path)
Step 3: Uploading Font File
We provide the text data and upload the font file.
# Sample text data
text = "Hello world this is a sample text for generating a word cloud. The word cloud will visualize the frequency of words in this text."
# Upload font file
uploaded = files.upload()
# Get the uploaded font bytes
font_bytes = uploaded[next(iter(uploaded))]
# Output image path
output_path = "word_cloud.png"
Step 4: Generating Word Cloud Image
Finally, we generate the word cloud image using the defined function.
# Generate word cloud image
generate_word_cloud(text, output_path, font_bytes)
How to Run this Code?
Finally, full code, just paste it into Google Colab notebook and hit the run, it will show the file upload button, and select the font from the local file system. You can download your favourite fonts from Google Fonts.
WordCloud with a white background:
import string
import random
from PIL import Image, ImageDraw, ImageFont
from google.colab import files
import io
def generate_word_cloud(text, output_path, font_bytes, image_size=(1260, 628), font_size_range=(20, 100), max_words=100):
# Initialize an image with a white background
image = Image.new("RGB", image_size, color="white")
draw = ImageDraw.Draw(image)
# Load font
font = ImageFont.truetype(io.BytesIO(font_bytes), font_size_range[1])
# Generate word frequency dictionary
word_freq = {}
for word in text.split():
# Remove punctuation
word = word.strip(string.punctuation)
# Convert to lowercase
word = word.lower()
if word:
word_freq[word] = word_freq.get(word, 0) + 1
# Sort words by frequency
sorted_word_freq = sorted(word_freq.items(), key=lambda x: x[1], reverse=True)[:max_words]
# Generate word cloud
for word, freq in sorted_word_freq:
# Random position
x = random.randint(0, image_size[0])
y = random.randint(0, image_size[1])
# Random font size within the specified range
font_size = random.randint(font_size_range[0], font_size_range[1])
font = ImageFont.truetype(io.BytesIO(font_bytes), font_size)
# Random color
color = (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255))
# Draw the word on the image
draw.text((x, y), word, fill=color, font=font)
# Save the image
image.save(output_path)
# Sample text data
text = "Hello world this is a sample text for generating a word cloud. The word cloud will visualize the frequency of words in this text."
# Upload font file
uploaded = files.upload()
# Get the uploaded font bytes
font_bytes = uploaded[next(iter(uploaded))]
# Output image path
output_path = "word_cloud.png"
# Generate word cloud image
generate_word_cloud(text, output_path, font_bytes)
# Display thumbnail
from IPython.display import Image
Image(output_path)
Here is an updated version with a gradient background:
import string
import random
import numpy as np
from PIL import Image, ImageDraw, ImageFont
from google.colab import files
import io
def generate_word_cloud(text, output_path, font_bytes, image_size=(1260, 628), font_size_range=(20, 100), max_words=100):
# Initialize an image with a white background
image = Image.new("RGB", image_size)
# Create a draw object
draw = ImageDraw.Draw(image)
# Generate colored background gradient
color1 = (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255))
color2 = (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255))
for i in range(image_size[0]):
current_color = tuple(int(color1[j] * (1 - i / image_size[0]) + color2[j] * (i / image_size[0])) for j in range(3))
draw.line([(i, 0), (i, image_size[1])], fill=current_color)
# Load font
font = ImageFont.truetype(io.BytesIO(font_bytes), font_size_range[1])
# Generate word frequency dictionary
word_freq = {}
for word in text.split():
# Remove punctuation
word = word.strip(string.punctuation)
# Convert to lowercase
word = word.lower()
if word:
word_freq[word] = word_freq.get(word, 0) + 1
# Sort words by frequency
sorted_word_freq = sorted(word_freq.items(), key=lambda x: x[1], reverse=True)[:max_words]
# Generate word cloud
for word, freq in sorted_word_freq:
# Random position
x = random.randint(0, image_size[0])
y = random.randint(0, image_size[1])
# Random font size within the specified range
font_size = random.randint(font_size_range[0], font_size_range[1])
font = ImageFont.truetype(io.BytesIO(font_bytes), font_size)
# Random color
color = (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255))
# Draw the word on the image
draw.text((x, y), word, fill=color, font=font)
# Save the image
image.save(output_path)
# Sample text data
text = "Hello world this is a sample text for generating a word cloud. The word cloud will visualize the frequency of words in this text. In this tutorial, we’ve learned how to create animated GIFs in Google Colab using the MoviePy library. Following these simple steps, you can easily generate and share your custom animations right from your browser. Animated GIFs are a popular way to share short, looping animations to the web. With the rise of tools like Google Colab, creating animated GIFs has become more accessible than ever."
# Upload font file
uploaded = files.upload()
# Get the uploaded font bytes
font_bytes = uploaded[next(iter(uploaded))]
# Output image path
output_path = "word_cloud.png"
# Generate word cloud image
generate_word_cloud(text, output_path, font_bytes)
# Display thumbnail
from IPython.display import Image
Image(output_path)
In this tutorial, we’ve covered the process of generating word clouds in Python using the Python Imaging Library (PIL). Following the steps outlined above, even beginners can create custom word clouds from text data and visualize word frequencies in a visually appealing manner. Experiment with different fonts, color schemes, and input texts to create unique word cloud visualizations for your projects.