Web Scraping Google Search Results in Python

Scraping Google search results looks simple: send a request, parse the HTML, extract titles and links. In practice, it rarely works that way. Google SERPs are dynamic, full of changing selectors, and protected by strong anti-bot systems. A basic Requests + BeautifulSoup script won’t get you far. Let’s walk through a setup that gets past these roadblocks.
Building Your Own Google SERP Scraper
To scrape SERPs, use either a headless browser or a dedicated Google SERP API.
Code Overview
Google frequently updates its CSS selectors, so make sure to verify and update them before running the script.
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
import time
import pandas as pd
import json
import urllib.parse
def init_driver():
options = Options()
driver = webdriver.Chrome(options=options)
return driver
def build_search_url(query: str):
encoded = urllib.parse.quote_plus(query)
return f"https://www.google.com/search?q={encoded}"
def extract_ai_overview(driver):
overview = {}
try:
block = driver.find_element(By.CSS_SELECTOR, "div[jsname='dvXlsc']")
overview = block.text if block else ""
except:
pass
return overview
def extract_people_also_ask(driver):
questions = []
try:
paa_blocks = driver.find_elements(By.CSS_SELECTOR, "div[jsname='N760b']")
for b in paa_blocks:
q_el = b.find_element(By.CSS_SELECTOR, "div.JlqpRe span")
questions.append(q_el.text)
except:
pass
return questions
def extract_related_searches(driver):
related = []
try:
related_blocks = driver.find_elements(By.CSS_SELECTOR, "span.dg6jd.JGD2rd")
for a in related_blocks:
related.append(a.text)
except:
pass
return related
def parse_serp(driver, query, max_pages=1):
results = []
base_url = build_search_url(query)
for page in range(max_pages):
url = base_url + (f"&start={page*10}" if page > 0 else "")
driver.get(url)
time.sleep(15)
try:
container = driver.find_element(By.ID, "center_col")
except:
continue
blocks = container.find_elements(By.CSS_SELECTOR, "div.MjjYud")
for block in blocks:
try:
title_el = block.find_element(By.CSS_SELECTOR, "h3")
link_el = block.find_element(By.CSS_SELECTOR, "a")
snippet_el = block.find_element(By.CSS_SELECTOR, "div.VwiC3b")
results.append({
"Title": title_el.text,
"Link": link_el.get_attribute("href"),
"Snippet": snippet_el.text
})
except:
continue
ai_overview = extract_ai_overview(driver)
people_also_ask = extract_people_also_ask(driver)
related_searches = extract_related_searches(driver)
return {
"organic_results": results,
"ai_overview": ai_overview,
"people_also_ask": people_also_ask,
"related_searches": related_searches
}
def save_data(data, json_filename="serp_full.json"):
if "organic_results" in data and data["organic_results"]:
df_organic = pd.DataFrame(data["organic_results"])
df_organic.to_csv("organic_results.csv", index=False, encoding="utf-8")
print(f"Saved {len(df_organic)} organic results to organic_results.csv")
if "ai_overview" in data and data["ai_overview"]:
df_ai = pd.DataFrame([data["ai_overview"]])
df_ai.to_csv("ai_overview.csv", index=False, encoding="utf-8")
print("Saved AI overview to ai_overview.csv")
if "people_also_ask" in data and data["people_also_ask"]:
df_paa = pd.DataFrame(data["people_also_ask"], columns=["Question"])
df_paa.to_csv("people_also_ask.csv", index=False, encoding="utf-8")
print(f"Saved {len(df_paa)} People Also Ask questions to people_also_ask.csv")
if "related_searches" in data and data["related_searches"]:
df_related = pd.DataFrame(data["related_searches"], columns=["Related_Search"])
df_related.to_csv("related_searches.csv", index=False, encoding="utf-8")
print(f"Saved {len(df_related)} related searches to related_searches.csv")
with open(json_filename, "w", encoding="utf-8") as f:
json.dump(data, f, ensure_ascii=False, indent=4)
print(f"Saved full SERP data to {json_filename}")
def main():
query = "what is web scraping"
driver = init_driver()
try:
data = parse_serp(driver, query, max_pages=3)
save_data(data)
finally:
driver.quit()
if __name__ == "__main__":
main()
Setup and Environment
Install the required libraries:
pip install selenium pandas
Import the necessary modules:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
import time
import pandas as pd
import json
import urllib.parse
If you are new to scraping, start with our Beginner’s Guide to Web Scraping in Python.
Page Structure Analysis
Extract data from the main blocks on the page:
- AI overview (if present)
- Organic results (title, link, snippet)
- People also ask (PAA)
- Related searches
For each element, find the right CSS selector using Chrome DevTools (press F12 or right-click and Inspect).
Here is the table with the selectors for this project:
Title | Selector | Description |
---|---|---|
AI overview container | div[jsname=‘dvXlsc’] | Google AI overview block. |
People also ask | div[jsname=‘N760b’] | Each expandable question card in the PAA. |
PAA question text | div.JlqpRe span | The visible text of the question inside the PAA block. |
Related search item | span.dg6jd.JGD2rd | Each suggested query in the related searches section. |
Main results container | #center_col | Google SERP’s core results container. |
Organic result block | div.MjjYud | Each individual organic search result card. |
Result title | h3 (inside div.MjjYud) | The clickable title of an organic search result. |
Result link | a (inside div.MjjYud) | The URL hyperlink pointing to the result’s website. |
Result snippet | div.VwiC3b | The short description/preview text shown under each result title. |
Check out our tutorials on how to work with CSS selectors and XPath. If you don’t want to deal with selectors, use HasData’s Google SERP API — it delivers all results in structured JSON.
Launch a Headless Browser
Set up a webdriver instance and set options:
def init_driver():
# Initialize Chrome WebDriver with options
options = Options()
driver = webdriver.Chrome(options=options)
return driver
Build a search URL from the keyword:
def build_search_url(query: str):
# Encode the query and build a Google search URL
encoded = urllib.parse.quote_plus(query)
return f"https://www.google.com/search?q={encoded}"
Scrape Organic Search Results
Navigate to the page, wait for it to load, and extract the organic results:
def parse_serp(driver, query, max_pages=1):
# Parse Google SERP organic results
results = []
base_url = build_search_url(query)
for page in range(max_pages):
# Add pagination parameter if needed (&start=10, &start=20, etc.)
url = base_url + (f"&start={page*10}" if page > 0 else "")
driver.get(url)
time.sleep(15) # Wait for the page to load (adjust as needed)
container = driver.find_element(By.ID, "center_col")
# Extract organic result blocks
blocks = container.find_elements(By.CSS_SELECTOR, "div.MjjYud")
for block in blocks:
title_el = block.find_element(By.CSS_SELECTOR, "h3")
link_el = block.find_element(By.CSS_SELECTOR, "a")
snippet_el = block.find_element(By.CSS_SELECTOR, "div.VwiC3b")
results.append({
"Title": title_el.text,
"Link": link_el.get_attribute("href"),
"Snippet": snippet_el.text
})
Scrape AI Overview
AI overview may not always load. Use a try/except block to handle it:
def extract_ai_overview(driver):
# Try to extract Google's AI Overview block
overview = {}
try:
block = driver.find_element(By.CSS_SELECTOR, "div[jsname='dvXlsc']")
overview = block.text if block else ""
except:
pass
return overview
Scrape People Also Ask
The PAA section may not always appear, so wrap the extraction in try/except:
def extract_people_also_ask(driver):
# Extract "People Also Ask" questions
questions = []
try:
paa_blocks = driver.find_elements(By.CSS_SELECTOR, "div[jsname='N760b']")
for b in paa_blocks:
q_el = b.find_element(By.CSS_SELECTOR, "div.JlqpRe span")
questions.append(q_el.text)
except:
pass
return questions
Scrape Related Searches
Extract related searches, if they appear:
def extract_related_searches(driver):
# Extract "Related Searches" suggestions
related = []
try:
related_blocks = driver.find_elements(By.CSS_SELECTOR, "span.dg6jd.JGD2rd")
for a in related_blocks:
related.append(a.text)
except:
pass
return related
Export Results to CSV/JSON
Save the data as JSON and store each SERP section (organic results, related searches, etc.) in separate CSV files. Print row counts for each file:
def save_data(data, json_filename="serp_full.json"):
# Save all data into a JSON file
with open(json_filename, "w", encoding="utf-8") as f:
json.dump(data, f, ensure_ascii=False, indent=4)
print(f"Saved full SERP data to {json_filename}")
# Save organic results to CSV
if "organic_results" in data and data["organic_results"]:
df_organic = pd.DataFrame(data["organic_results"])
df_organic.to_csv("organic_results.csv", index=False, encoding="utf-8")
print(f"Saved {len(df_organic)} organic results to organic_results.csv")
# Save AI Overview to CSV
if "ai_overview" in data and data["ai_overview"]:
df_ai = pd.DataFrame([data["ai_overview"]])
df_ai.to_csv("ai_overview.csv", index=False, encoding="utf-8")
print("Saved AI overview to ai_overview.csv")
# Save People Also Ask questions to CSV
if "people_also_ask" in data and data["people_also_ask"]:
df_paa = pd.DataFrame(data["people_also_ask"], columns=["Question"])
df_paa.to_csv("people_also_ask.csv", index=False, encoding="utf-8")
print(f"Saved {len(df_paa)} People Also Ask questions to people_also_ask.csv")
# Save related searches to CSV
if "related_searches" in data and data["related_searches"]:
df_related = pd.DataFrame(data["related_searches"], columns=["Related_Search"])
df_related.to_csv("related_searches.csv", index=False, encoding="utf-8")
print(f"Saved {len(df_related)} related searches to related_searches.csv")
Alternative Solution: HasData Google Search API
HasData’s Google SERP API delivers structured JSON search data while returning region-specific results — no browser automation, selectors, proxies, or CAPTCHAs required.
Get Your API Key
To use the API, register on the HasData website and get your API key. The key is activated after email confirmation (or instantly, if you sign up with Google or GitHub).
Basic Google Search Results Scraper
Replace the API key with your own and set the request parameters before running the script.
import requests
import json
import os
from urllib.parse import urlencode
BASE_URL = "https://api.hasdata.com/scrape/google/serp"
api_key = "YOUR-API-KEY"
QUERY = "Coffee"
LOCATION = "Austin,Texas,United States"
DEVICE_TYPE = "desktop"
LANG = "en"
GL = "us"
HEADERS = {
"Content-Type": "application/json",
"x-api-key": api_key
}
def build_url():
params = {}
if QUERY:
params["q"] = QUERY
if LOCATION:
params["location"] = LOCATION
if DEVICE_TYPE:
params["deviceType"] = DEVICE_TYPE
if LANG:
params["hl"] = LANG
if GL:
params["gl"] = GL
return f"{BASE_URL}?{urlencode(params)}"
def fetch_data():
url = build_url()
response = requests.get(url, headers=HEADERS)
if response.status_code != 200:
raise Exception(f"Error {response.status_code}: {response.text}")
return response.json()
def save_csv(items, filename):
if not items: return
df = pd.DataFrame(items)
df.to_csv(filename, index=False, encoding="utf-8")
print(f"{filename} saved, {len(df)} rows")
def save_json(data, filename):
with open(filename, "w", encoding="utf-8") as f:
json.dump(data, f, ensure_ascii=False, indent=4)
print(f"{filename} saved")
def main():
data = fetch_data()
save_csv(data.get("organicResults"), "organic_results.csv")
local_places = data.get("localResults", {}).get("places")
save_csv(local_places, "local_places.csv")
save_csv(data.get("relatedSearches"), "related_searches.csv")
paa = [{"question": q["question"]} for q in data.get("relatedQuestions", [])]
save_csv(paa, "people_also_ask.csv")
kg = data.get("knowledgeGraph", {})
if kg:
main_info = {k: v for k, v in kg.items() if k != "nutritionInformation" and k != "headerImages"}
save_csv([main_info], "knowledge_graph.csv")
nutrition = kg.get("nutritionInformation")
if nutrition:
nutrients = nutrition.get("nutrient", {})
save_csv([{"description": nutrition.get("description"), **nutrients}], "nutrition.csv")
save_csv(kg.get("headerImages"), "knowledge_graph_images.csv")
save_csv(data.get("perspectives"), "perspectives.csv")
save_json(data, "full_serp.json")
if __name__ == "__main__":
main()
Import Libraries
Import the libraries to the project:
import requests
import json
import os
from urllib.parse import urlencode
import pandas as pd
Set Parameters
Set your API key and the list of desired parameters (you can find the full list of available Google SERP API parameters in the documentation).
# API base URL
BASE_URL = "https://api.hasdata.com/scrape/google/serp"
api_key = "YOUR-API-key"
# Optional parameters (leave empty if not needed)
QUERY = "Coffee"
LOCATION = "Austin,Texas,United States"
DEVICE_TYPE = "desktop"
LANG = "en"
GL = "us"
# API headers
HEADERS = {
"Content-Type": "application/json",
"x-api-key": api_key
}
Make a Request
Build the API request URL from the parameters (some may be optional or missing).
# Build request URL
def build_url():
params = {}
if QUERY:
params["q"] = QUERY
if LOCATION:
params["location"] = LOCATION
if DEVICE_TYPE:
params["deviceType"] = DEVICE_TYPE
if LANG:
params["hl"] = LANG
if GL:
params["gl"] = GL
return f"{BASE_URL}?{urlencode(params)}"
Send the request and receive a JSON response with the search results:
# Get JSON data from API
def fetch_data():
url = build_url()
response = requests.get(url, headers=HEADERS)
if response.status_code != 200:
raise Exception(f"Error {response.status_code}: {response.text}")
return response.json()
Process and Save SERP Sections
Add universal functions to save the data as JSON or CSV:
# Save CSV
def save_csv(items, filename):
if not items: return
df = pd.DataFrame(items)
df.to_csv(filename, index=False, encoding="utf-8")
print(f"{filename} saved, {len(df)} rows")
# Save JSON
def save_json(data, filename):
with open(filename, "w", encoding="utf-8") as f:
json.dump(data, f, ensure_ascii=False, indent=4)
print(f"{filename} saved")
Example sections to parse and save:
- Organic results
- Local places
- Related searches
- People also ask (related questions)
- Knowledge graph (main and additional info, header images)
- Perspectives
These appear in the sample response, but you can extend the script to parse all the available sections listed in the Google SERP API documentation.
def main():
data = fetch_data()
# Organic results
save_csv(data.get("organicResults"), "organic_results.csv")
# Local places
local_places = data.get("localResults", {}).get("places")
save_csv(local_places, "local_places.csv")
# Related searches
save_csv(data.get("relatedSearches"), "related_searches.csv")
# People Also Ask
paa = [{"question": q["question"]} for q in data.get("relatedQuestions", [])]
save_csv(paa, "people_also_ask.csv")
# Knowledge Graph
kg = data.get("knowledgeGraph", {})
if kg:
# Save main info
main_info = {k: v for k, v in kg.items() if k != "nutritionInformation" and k != "headerImages"}
save_csv([main_info], "knowledge_graph.csv")
# Save nutrition info
nutrition = kg.get("nutritionInformation")
if nutrition:
nutrients = nutrition.get("nutrient", {})
save_csv([{"description": nutrition.get("description"), **nutrients}], "nutrition.csv")
# Save header images
save_csv(kg.get("headerImages"), "knowledge_graph_images.csv")
# Perspectives
save_csv(data.get("perspectives"), "perspectives.csv")
# Full JSON
save_json(data, "full_serp.json")
Pick the Method That Works Best for You
Scraping Google search results with a browser requires constant selector updates and anti-bot handling. The Google SERP API gives you ready-to-use JSON, so you don’t need to parse HTML or handle captchas. You can also pick the region you want, since the API uses proxies for localization.
