Skip to content

GREAT_AMERICA Datatape

Generates a datatape for GA portfolios, driven by a local S1 datatape file. Includes system information, payment data, billing details, and portfolio performance metrics.

Overview

Generation Frequency: Monthly Development Status: Stable

Key Features: - Equipment finance tracking - Lease portfolio analytics - Partnership data management

Key Stakeholders: - Treasury Reporting Team

Output Columns

The following columns are included in the final datatape output (filtered from SQL queries based on configuration):

Column Analysis Summary

  • Total columns found in SQL queries: 79
  • Columns specified in output_columns config: 57
  • Columns found in SQL and included in output: 9
  • Missing columns (in config but not found in SQL): 48

Note: Only columns that exist in SQL queries are included in the final output.

Column Name Required Source Query Source Table Source Field Derived Transformations
Estimated Production by Year Yes ga_systems sfdc_staging.vw_quote__c Estimated Production by Year No
Guaranteed Production by Year Yes ga_systems sfdc_staging.vw_quote__c Guaranteed Production by Year No
Quote: Contract Type Yes ga_systems sfdc_staging.vw_system__c Quote: Contract Type No
Quote: Installation State Yes ga_systems sfdc_staging.vw_system__c Quote: Installation State No
Quote: Payment Escalator Yes ga_systems sfdc_staging.vw_quote__c Quote: Payment Escalator No
Quote: Solar Rate Yes ga_systems sfdc_staging.vw_quote__c Quote: Solar Rate No
Quote: Term (months) Yes ga_systems sfdc_staging.vw_quote__c Quote: Term (months) No
Quote: Term (years) Yes ga_systems sfdc_staging.vw_quote__c Quote: Term (years) No
Sunnova System ID Yes ga_systems sfdc_staging.vw_system__c Sunnova System ID No

Note: Columns marked as 'Derived' are calculated or transformed rather than directly selected from the source table.

Data Sources

The configuration pulls data from the following sources:

Source Name Type Query/File Description
ach database ach.sql SQL query for ach data
pbi database pbi.sql SQL query for pbi data
ga_systems database ga_systems.sql SQL query for ga_systems data
billing database billing.sql SQL query for billing data
payment_data database payment_data.sql SQL query for payment_data data

Configuration

Database Configuration

  • dbcreds_environment: fusionods

Paths

  • sql_dir: ./sql
  • cache_dir: ./query_cache
  • output_dir: ./completed_output
  • log_dir: ./logs
  • data_dir: C:/Users/thandolwethu.dlamini/OneDrive - Sunnova Energy International/Documents - FinOps/Treasury/Reporting/S1 Datatape

SQL Queries

-- file: sql/ach.sql
SELECT
    ap."Payment Method Name",
    s."System Name",
    c."Full Name (1)",
    rt."Record Type Name",
    ap."AutoPay"
FROM
    sfdc_staging.vw_ach_payment_method__c AS ap
JOIN
    sfdc_staging.vw_contact AS c ON ap."Billing Contact ID18" = c."Contact ID"
JOIN
    sfdc_staging.vw_system__c AS s ON s."Billing Contact" = ap."Billing Contact ID18"
JOIN
    sfdc_staging.vw_recordtype AS rt ON ap."Record Type ID" = rt."Record Type ID"
WHERE
    ap."AutoPay" = TRUE
    AND s."System Name" IN :system_names;
-- file: sql/pbi.sql
SELECT
    sp."Sunnova System ID",
    sp."Rebate Reservation Number",
    s."Asset Portfolio - Customer",
    s."Contract Type",
    q."SREC Owner",
    sp."Stage",
    s."Amount",
    s."System Size",
    s."Installation Address State",
    q."Expected Production - Guaranteed",
    s."Status",
    sp."InService Date",
    sp."PTO Received Date",
    q."Created Date"
FROM
    sfdc_staging.vw_system__c AS s
JOIN
    sfdc_staging.vw_system_project__c AS sp ON s."Record ID" = sp."System"
JOIN
    sfdc_staging.vw_quote__c AS q ON sp."Quote" = q."Record ID"
WHERE
    s."System Name" IN :system_names
    AND sp."Stage" IN ('Contract', 'NTP', 'Substantial', 'Final', 'Completed')
    AND s."Installation Address State" = 'CT'
    AND s."Status" NOT IN ('Retired', 'Cancelled', 'Pending Cancellation')
    AND (q."SREC Owner" != 'Customer' OR q."SREC Owner" IS NULL);
-- file: sql/ga_systems.sql
SELECT
    s."Asset Portfolio - Partner",
    s."Asset Portfolio - Customer",
    s."Customer Account ID",
    s."System Name" AS "Sunnova System ID", -- Alias for consistent joining
    sp."Committed Capital",
    s."Contract Type" as "Quote: Contract Type",
    s."System Size",
    s."Monthly Payment Amount",
    q."Recurring Payment",
    q."Expected Production - Orig Simulation" as "Estimated Production by Year",
    q."Expected Production - Guaranteed" as "Guaranteed Production by Year",
    q."Solar Rate (1)" as "Quote: Solar Rate",
    q."Payment Escalator" as "Quote: Payment Escalator",
    q."Term (months)" as "Quote: Term (years)",
    q."Term (years)" as "Quote: Term (months)",
    sp."Stage",
    s."Installation State" as "Quote: Installation State",
    s."Installation Address City",
    q."Origination Partner Account",
    s."Primary Customer Name",
    q."Primary Lessee Middle Name",
    s."Installation Address",
    s."Installation Address Line 2",
    q."Primary Customer Email",
    q."Primary Customer Phone",
    sp."Contract Stage Date",
    sp."NTP Stage Date",
    sp."Substantial Stage Date",
    sp."Final Stage Date",
    sp."InService Date",
    sp."Completed Stage Date",
    sp."PTO Received Date",
    s."Latest Customer Payment Date",
    s."Next Payment Date",
    s."Latest Invoice Date",
    sp."NTP Stage Payment",
    sp."Substantial Stage Payment",
    sp."Final Stage Payment",
    q."Total EPC",
    q."Per Kwh Cost"
FROM
    sfdc_staging.vw_system__c AS s
JOIN
    sfdc_staging.vw_system_project__c AS sp ON s."Record ID" = sp."System"
JOIN
    sfdc_staging.vw_quote__c AS q ON sp."Quote" = q."Record ID"
WHERE
    s."System Name" IN :system_names
    AND s."Contract Type" IN :contract_types
    AND sp."Stage" NOT IN :stages_to_exclude;
-- file: sql/billing.sql
SELECT
    s."System Name",
    sp."Period No.",
    sp."Service Period ID",
    sp."Start Date",
    sp."End Date",
    s."Record ID",
    s."Asset Portfolio - Customer"
FROM
    sfdc_staging.vw_system__c AS s
JOIN
    sfdc_staging.vw_service_period__c AS sp ON s."Record ID" = sp."System"
WHERE
    s."System Name" IN :system_names
    AND sp."Start Date" >= :start_date
    AND sp."Start Date" < :end_date;
-- file: sql/payment_data.sql
SELECT
    s."Asset Portfolio - Customer",
    s."Asset Portfolio - Partner",
    s."System Name",
    s."Contract Type",
    aa."Year (2)",
    aa."Initial Monthly Payment",
    aa."Monthly Payment",
    aa."Contract Prepayment Amount",
    aa."ACH PPA Rate (kWh)",
    aa."NonACH PPA Rate (kWh)",
    aa."Estimated Monthly Bill NonACH",
    q."Created Date",
    s."CA - PUC Filing Date"
FROM
    sfdc_staging.vw_system__c AS s
JOIN
    sfdc_staging.vwx_quote__c AS q ON s."Quote" = q."Record ID"
JOIN
    sfdc_staging.vw_annual_attributes__c AS aa ON q."Record ID" = aa."Quote"
WHERE
    s."System Name" IN :system_names
    AND aa."Year (2)" = 1;

Data Processing

Data processing is handled by: datatapes/great_america/run_great_america.py

Python Modules

This datatape includes additional Python modules with business logic:

profile_ga

profile_output_data(df: pd.DataFrame) -> None

Profile the output data file using skimpy and display key statistics and potential issues.

Parameters:

Name Type Description Default
df DataFrame

DataFrame containing the data to profile

required
Source code in datatapes/great_america/profile_ga.py
def profile_output_data(df: pd.DataFrame) -> None:
    """
    Profile the output data file using skimpy and display key statistics and potential issues.

    Args:
        df: DataFrame containing the data to profile
    """
    try:
        # Suppress warnings
        warnings.filterwarnings('ignore')

        # Initialize console
        console = Console()

        # Read the CSV file
        console.print("\n[bold blue]Loading Data...[/bold blue]")
        # 

        # Basic Dataset Info
        console.print("\n[bold blue]Dataset Overview[/bold blue]")
        console.print(f"Total Rows: {len(df):,}")
        console.print(f"Total Columns: {len(df.columns):,}")

        # Generate comprehensive profile using skimpy
        console.print("\n[bold blue]Detailed Data Profile[/bold blue]")
        profile = skim(df)
        console.print(profile)

        # Additional Custom Analysis for Installation Addresses
        if 'Installation Address City' in df.columns:
            console.print("\n[bold blue]Installation Address Analysis[/bold blue]")

            # Count unique cities
            cities = df['Installation Address City'].value_counts()
            unknown_count = cities.get('UNKNOWN', 0)
            unknown_percentage = (unknown_count / len(df)) * 100

            console.print(f"Total UNKNOWN cities: {unknown_count:,} ({unknown_percentage:.2f}%)")

            # Create table for sample addresses
            table = Table(title="Sample Addresses")
            table.add_column("Installation Address", style="cyan")
            table.add_column("City", style="green")
            table.add_column("State", style="yellow")

            # Sample both UNKNOWN and known cities
            sample_size = 5
            if unknown_count > 0:
                unknown_samples = df[df['Installation Address City'] == 'UNKNOWN'].sample(
                    min(sample_size, unknown_count)
                )
                console.print("\n[bold]Sample UNKNOWN Cities:[/bold]")
                for _, row in unknown_samples.iterrows():
                    table.add_row(
                        str(row.get('Installation Address', '')),
                        str(row.get('Installation Address City', '')),
                        str(row.get('Quote: Installation State', ''))
                    )

            known_samples = df[df['Installation Address City'] != 'UNKNOWN'].sample(
                min(sample_size, len(df) - unknown_count)
            )
            console.print("\n[bold]Sample Known Cities:[/bold]")
            for _, row in known_samples.iterrows():
                table.add_row(
                    str(row.get('Installation Address', '')),
                    str(row.get('Installation Address City', '')),
                    str(row.get('Quote: Installation State', ''))
                )

            console.print(table)



    except Exception as e:
        console.print(f"[bold red]Error profiling data: {str(e)}[/bold red]")