Google BigQuery

This section shows how to connect to Google BigQuery as a data source on the Platform.

Python

# Platform Kernels: Python 2,3
# Snippet Libraries: google.cloud.bigquery==0.26.0, pandas==0.20.3

# Installation/upgrade of google.cloud.bigquery :
# !pip install --upgrade google-cloud-bigquery

import os
import pandas as pd
from google.cloud import bigquery
from google.cloud.bigquery import SchemaField


def pull_data_from_bigquery(query_string):
    """ Pull data using Google BigQuery.

    Parameters
    ----------

    query_string: string
      Your BigQuery query string.

    Returns
    -------

    Pandas DataFrame with the results of the query.

    Note:
    Assumes the following environment variables
        - GOOGLE_CLOUD_PROJECT set to the BigQuery Project ID
        - GOOGLE_CLOUD_CRED_PATH set to GCloud credential JSON file

    """
    # More details here : https://google-cloud-python.readthedocs.io/en/latest/core/modules.html
    client = bigquery.Client.from_service_account_json(os.environ['GOOGLE_CLOUD_CRED_PATH'])

    query = bigquery.query.QueryResults(query_string, client)
    query.run()
    assert query.complete, 'Query not completed'
    dat = query.fetch_data()
    df = pd.DataFrame(dat.query_result.rows,
                      columns=[x.name for x in dat.query_result.schema])

    return df

Usage Example

from bigquery import pull_data_from_bigquery

# Usage example:

# In your environment file:
# export GOOGLE_CLOUD_PROJECT='your-project-id'
# export GOOGLE_CLOUD_CRED_PATH='path/to/credentials/file.json'

query = """SELECT year, month, day, weight_pounds
           FROM [publicdata:samples.natality]
           LIMIT 5"""
results = pull_data_from_bigquery(query)

R

# RUN this in ** RStudio **

# Platform Kernels: RStudio
# Snippet Libraries: bigrquery==0.3.0, dplyr==0.5.0
#
# For more details on bigrquery, go to :
# https://github.com/rstats-db/bigrquery

# install the libraries :
install.packages("dplyr")
install.packages("bigrquery")
install.packages('httpuv')

library("bigrquery")

queryBigQuery <- function(project, queryString) {
  # Query BigQuery and returns the results of the query
  # as a data.frame.
  #
  # Args:
  #   project: project ID
  #   queryString: BigQuery query string
  #
  # Returns:
  #   dataframe of the query results.
  #
  queryResults <- query_exec(queryString, project=project)

  return(queryResults)
}

Usage Example

main <- function(){
# Usage Example:

library("bigrquery")

# Authenticate based on JSON token
set_service_token(Sys.getenv('GOOGLE_CLOUD_CRED_PATH'))

# Use your project ID here
project <- Sys.getenv('GOOGLE_CLOUD_PROJECT') # put your project ID here

# Querying BigQuery's public datasets :
queryString <- "SELECT year, month, day, weight_pounds FROM [publicdata:samples.natality] LIMIT 5"

queryResults <- queryBigQuery(project, queryString)
}