Google Cloud
BigQuery
Simple Query
SELECT
fullVisitorId,
date,
v2ProductName,
pageTitle
FROM `data-to-insights.ecommerce.all_sessions`
WHERE visitId = 1501570398
ORDER BY date
Aggregate Array
SELECT
fullVisitorId,
date,
ARRAY_AGG(DISTINCT v2ProductName) AS products_viewed,
ARRAY_LENGTH(ARRAY_AGG(DISTINCT v2ProductName)) AS distinct_products_viewed,
ARRAY_AGG(DISTINCT pageTitle) AS pages_viewed,
ARRAY_LENGTH(ARRAY_AGG(DISTINCT pageTitle)) AS distinct_pages_viewed
FROM `data-to-insights.ecommerce.all_sessions`
WHERE visitId = 1501570398
GROUP BY fullVisitorId, date
ORDER BY date
Unnest (array to rows)
SELECT DISTINCT
visitId,
h.page.pageTitle
FROM `bigquery-public-data.google_analytics_sample.ga_sessions_20170801`,
UNNEST(hits) AS h
WHERE visitId = 1501570398
LIMIT 10
Struct
SELECT STRUCT("Rudisha" as name, [23.4, 26.3, 26.4, 26.1] as splits) AS runner
Partition
CREATE OR REPLACE TABLE ecommerce.partition_by_day
PARTITION BY date_formatted
OPTIONS(
partition_expiration_days=60,
description="a table partitioned by date"
) AS
SELECT DISTINCT
PARSE_DATE("%Y%m%d", date) AS date_formatted,
fullvisitorId
FROM `data-to-insights.ecommerce.all_sessions_raw`
Alter
ALTER TABLE `covid_data.global_mobility_tracker_data`
ADD COLUMN IF NOT EXISTS population INTEGER;
ALTER TABLE `covid_data.global_mobility_tracker_data`
ADD COLUMN IF NOT EXISTS country_area FLOAT64;
ALTER TABLE `covid_data.global_mobility_tracker_data`
ADD COLUMN IF NOT EXISTS mobility STRUCT<
avg_retail FLOAT64,
avg_grocery FLOAT64,
avg_parks FLOAT64,
avg_transit FLOAT64,
avg_workplace FLOAT64,
avg_residential FLOAT64
>;
Update
UPDATE `qwiklabs-gcp-02-78163c9a3a68.covid_data.consolidate_covid_tracker_data` as c
SET country_area = cna.country_area
FROM `bigquery-public-data.census_bureau_international.country_names_area` as cna
WHERE LOWER(c.country_name) = LOWER(cna.country_name);
Window Function
Lag
WITH india_cases_by_date AS (
SELECT
date,
SUM(cumulative_confirmed) AS cases
FROM
`bigquery-public-data.covid19_open_data.covid19_open_data`
WHERE
country_name="India"
AND date between '2020-02-25' and '2020-03-13'
GROUP BY
date
ORDER BY
date ASC
)
, india_previous_day_comparison AS
(SELECT
date,
cases,
LAG(cases) OVER(ORDER BY date) AS previous_day,
cases - LAG(cases) OVER(ORDER BY date) AS net_new_cases
FROM india_cases_by_date
)
SELECT count(*)
FROM india_previous_day_comparison
WHERE net_new_cases = 0
Dataflow
Pub/Sub to bigquery
gcloud dataflow jobs run dfjob-93241 \
--gcs-location gs://dataflow-templates-us-east1/latest/PubSub_to_BigQuery \
--region us-east1 \
--staging-location gs://qwiklabs-gcp-03-5e5fec771961/temp \
--parameters \
inputTopic=projects/<project_id>/topics/sensors-temp-39165,\
outputTableSpec=<project_id>:sensors_973.temperature_270
Last updated