Google Cloud

BigQuery

Simple Query

SELECT
  fullVisitorId,
  date,
  v2ProductName,
  pageTitle
  FROM `data-to-insights.ecommerce.all_sessions`
WHERE visitId = 1501570398
ORDER BY date

Aggregate Array

SELECT
  fullVisitorId,
  date,
  ARRAY_AGG(DISTINCT v2ProductName) AS products_viewed,
  ARRAY_LENGTH(ARRAY_AGG(DISTINCT v2ProductName)) AS distinct_products_viewed,
  ARRAY_AGG(DISTINCT pageTitle) AS pages_viewed,
  ARRAY_LENGTH(ARRAY_AGG(DISTINCT pageTitle)) AS distinct_pages_viewed
  FROM `data-to-insights.ecommerce.all_sessions`
WHERE visitId = 1501570398
GROUP BY fullVisitorId, date
ORDER BY date

Unnest (array to rows)

SELECT DISTINCT
  visitId,
  h.page.pageTitle
FROM `bigquery-public-data.google_analytics_sample.ga_sessions_20170801`,
UNNEST(hits) AS h
WHERE visitId = 1501570398
LIMIT 10

Struct

SELECT STRUCT("Rudisha" as name, [23.4, 26.3, 26.4, 26.1] as splits) AS runner

Partition

 CREATE OR REPLACE TABLE ecommerce.partition_by_day
 PARTITION BY date_formatted
 OPTIONS(
   partition_expiration_days=60,
   description="a table partitioned by date"
 ) AS

 SELECT DISTINCT
 PARSE_DATE("%Y%m%d", date) AS date_formatted,
 fullvisitorId
 FROM `data-to-insights.ecommerce.all_sessions_raw`

Alter

ALTER TABLE `covid_data.global_mobility_tracker_data`
ADD COLUMN IF NOT EXISTS population INTEGER;

ALTER TABLE `covid_data.global_mobility_tracker_data`
ADD COLUMN IF NOT EXISTS country_area FLOAT64;

ALTER TABLE `covid_data.global_mobility_tracker_data`
ADD COLUMN IF NOT EXISTS mobility STRUCT<
  avg_retail FLOAT64,
  avg_grocery FLOAT64,
  avg_parks FLOAT64,
  avg_transit FLOAT64,
  avg_workplace FLOAT64,
  avg_residential FLOAT64
>;

Update

UPDATE `qwiklabs-gcp-02-78163c9a3a68.covid_data.consolidate_covid_tracker_data` as c
SET country_area = cna.country_area
FROM `bigquery-public-data.census_bureau_international.country_names_area` as cna
WHERE LOWER(c.country_name) = LOWER(cna.country_name);

Window Function

Lag

WITH india_cases_by_date AS (
  SELECT
    date,
    SUM(cumulative_confirmed) AS cases
  FROM
    `bigquery-public-data.covid19_open_data.covid19_open_data`
  WHERE
    country_name="India"
    AND date between '2020-02-25' and '2020-03-13'
  GROUP BY
    date
  ORDER BY
    date ASC
 )

, india_previous_day_comparison AS
(SELECT
  date,
  cases,
  LAG(cases) OVER(ORDER BY date) AS previous_day,
  cases - LAG(cases) OVER(ORDER BY date) AS net_new_cases
FROM india_cases_by_date
)

SELECT count(*)
FROM india_previous_day_comparison
WHERE net_new_cases = 0

Dataflow

Pub/Sub to bigquery

gcloud dataflow jobs run dfjob-93241 \
    --gcs-location gs://dataflow-templates-us-east1/latest/PubSub_to_BigQuery \
    --region us-east1 \
    --staging-location gs://qwiklabs-gcp-03-5e5fec771961/temp \
    --parameters \
inputTopic=projects/<project_id>/topics/sensors-temp-39165,\
outputTableSpec=<project_id>:sensors_973.temperature_270

Last updated