Gemini integrates directly with BigQuery through generative AI functions, enabling you to perform sophisticated data analysis using natural language. Query your data, generate insights, forecast trends, and extract structured information—all within familiar SQL.
# Get service accountSERVICE_ACCOUNT=$(bq show --connection --location=us genai_connection \ --format=json | jq -r '.cloudResource.serviceAccountId')# Grant Vertex AI User rolegcloud projects add-iam-policy-binding PROJECT_ID \ --member="serviceAccount:${SERVICE_ACCOUNT}" \ --role="roles/aiplatform.user"
SELECT *FROM AI.GENERATE_TABLE( MODEL `project.dataset.gemini_model`, TABLE `project.dataset.sales_data`, STRUCT( '''Analyze these sales transactions and create a summary table with columns: category, total_sales, avg_order_value, top_product, growth_rate''' AS prompt, 0.2 AS temperature ) );
SELECT email, AI.GENERATE_BOOL( MODEL `project.dataset.gemini_model`, CONCAT('Is this a valid business email? ', email) ) AS is_business_emailFROM `project.dataset.contacts`;
SELECT review_text, AI.GENERATE_INT( MODEL `project.dataset.gemini_model`, CONCAT('Rate this review from 1-5 stars: ', review_text) ) AS star_rating, AI.GENERATE_DOUBLE( MODEL `project.dataset.gemini_model`, CONCAT('Estimate purchase likelihood 0-1: ', review_text) ) AS purchase_probabilityFROM `project.dataset.reviews`;
from google import genaifrom google.genai.types import GenerateContentConfigclient = genai.Client(vertexai=True, project=PROJECT_ID, location=LOCATION)table_schema = """Tables:- sales (date, product_id, amount, quantity, region)- products (product_id, name, category, price)- customers (customer_id, name, region, signup_date)"""user_question = "What were the top 5 products by revenue in California last month?"prompt = f"""Generate a BigQuery SQL query to answer this question:{user_question}Schema:{table_schema}Rules:- Use Standard SQL syntax- Include table aliases- Add comments- Format for readability"""response = client.models.generate_content( model="gemini-2.0-flash", contents=prompt, config=GenerateContentConfig(temperature=0),)print(response.text)
Output:
-- Top 5 products by revenue in California last monthSELECT p.product_id, p.name AS product_name, p.category, SUM(s.amount) AS total_revenue, SUM(s.quantity) AS units_soldFROM `project.dataset.sales` AS sINNER JOIN `project.dataset.products` AS p ON s.product_id = p.product_idWHERE s.region = 'California' AND s.date >= DATE_SUB(CURRENT_DATE(), INTERVAL 1 MONTH) AND s.date < DATE_TRUNC(CURRENT_DATE(), MONTH)GROUP BY p.product_id, p.name, p.categoryORDER BY total_revenue DESCLIMIT 5;
-- Enrich customer data with AI-generated insightsCREATE OR REPLACE TABLE `project.dataset.enriched_customers` ASWITH customer_activity AS ( SELECT c.customer_id, c.name, COUNT(DISTINCT o.order_id) AS total_orders, SUM(o.amount) AS lifetime_value, STRING_AGG(DISTINCT p.category) AS purchased_categories FROM `project.dataset.customers` c LEFT JOIN `project.dataset.orders` o ON c.customer_id = o.customer_id LEFT JOIN `project.dataset.products` p ON o.product_id = p.product_id GROUP BY c.customer_id, c.name)SELECT customer_id, name, total_orders, lifetime_value, AI.GENERATE( MODEL `project.dataset.gemini_model`, CONCAT( 'Create a customer profile and recommendations. ', 'Orders: ', CAST(total_orders AS STRING), ', ', 'LTV: $', CAST(lifetime_value AS STRING), ', ', 'Categories: ', purchased_categories ), STRUCT( JSON''' { "type": "OBJECT", "properties": { "segment": {"type": "STRING"}, "churn_risk": {"type": "STRING"}, "recommendations": {"type": "ARRAY", "items": {"type": "STRING"}}, "next_best_action": {"type": "STRING"} } }''' AS response_schema, 'application/json' AS response_mime_type ) ) AS customer_insightsFROM customer_activity;
-- Batch process for better performanceCREATE OR REPLACE TABLE `project.dataset.processed_data` ASSELECT *, AI.GENERATE_TEXT( MODEL `project.dataset.gemini_model`, prompt, STRUCT(0.2 AS temperature, 100 AS max_output_tokens) ) AS generated_textFROM ( -- Pre-filter and prepare data SELECT id, CONCAT('Summarize: ', SUBSTR(text, 1, 500)) AS prompt FROM `project.dataset.large_table` WHERE text IS NOT NULL AND LENGTH(text) > 100);-- Use incremental processing for large datasetsMERGE `project.dataset.processed_data` TUSING ( SELECT * FROM `project.dataset.new_records`) SON T.id = S.idWHEN NOT MATCHED THEN INSERT ROW;