Skip to content

Instantly share code, notes, and snippets.

@JeromeGuyon
Created March 10, 2024 12:00
Show Gist options
  • Select an option

  • Save JeromeGuyon/fc6221b6749768cc5e3eb4a40b50e779 to your computer and use it in GitHub Desktop.

Select an option

Save JeromeGuyon/fc6221b6749768cc5e3eb4a40b50e779 to your computer and use it in GitHub Desktop.
AWSTemplateFormatVersion: 2010-09-09
Parameters:
Database1Name:
Type: String
Default: datazone_test1_database
Database2Name:
Type: String
Default: datazone_test2_database
DataZoneDomainName:
Type: String
Default: datazone-domain-test
Resources:
Database1:
Type: AWS::Glue::Database
Properties:
CatalogId: !Ref AWS::AccountId
DatabaseInput:
Name: !Ref Database1Name
LocationUri: !Sub "s3://${Database1Bucket}"
Database1Bucket:
Type: AWS::S3::Bucket
Properties:
BucketName: !Sub
- "${DatabaseName}-${AWS::AccountId}-${AWS::Region}"
- DatabaseName: !Join [ '-', !Split [ '_', !Ref Database1Name ] ] # replace _ with -
PublicAccessBlockConfiguration:
BlockPublicAcls: true
BlockPublicPolicy: true
IgnorePublicAcls: true
RestrictPublicBuckets: true
BucketEncryption:
ServerSideEncryptionConfiguration:
- ServerSideEncryptionByDefault:
SSEAlgorithm: AES256
Database2:
Type: AWS::Glue::Database
Properties:
CatalogId: !Ref AWS::AccountId
DatabaseInput:
Name: !Ref Database2Name
LocationUri: !Sub "s3://${Database2Bucket}"
Database2Bucket:
Type: AWS::S3::Bucket
Properties:
BucketName: !Sub
- "${DatabaseName}-${AWS::AccountId}-${AWS::Region}"
- DatabaseName: !Join [ '-', !Split [ '_', !Ref Database2Name ] ] # replace _ with -
PublicAccessBlockConfiguration:
BlockPublicAcls: true
BlockPublicPolicy: true
IgnorePublicAcls: true
RestrictPublicBuckets: true
BucketEncryption:
ServerSideEncryptionConfiguration:
- ServerSideEncryptionByDefault:
SSEAlgorithm: AES256
Domain:
Type: AWS::DataZone::Domain
Properties:
Name: !Ref DataZoneDomainName
Description: Cloudformation domain test
# unclear if it is created by datazone if it does ,pt exists already
DomainExecutionRole: !Sub "arn:aws:iam::${AWS::AccountId}:role/service-role/AmazonDataZoneDomainExecution"
SingleSignOn:
Type: IAM_IDC
UserAssignment: MANUAL
Tags:
- Key: Env
Value: test
ManageAccessRole:
Type: AWS::IAM::Role
Properties:
RoleName: !Sub
- "AmazonDataZoneGlueAccess-${AWS::Region}-${DomainId}"
- DomainId: !Join [ '-', !Split [ '_', !GetAtt Domain.Id ] ]
AssumeRolePolicyDocument:
Version: 2012-10-17
Statement:
- Effect: Allow
Principal:
Service:
- datazone.amazonaws.com
Action:
- sts:AssumeRole
Condition:
StringEquals:
aws:SourceAccount: !Ref AWS::AccountId
ArnEquals:
aws:SourceArn: !Sub "arn:aws:datazone:${AWS::Region}:${AWS::AccountId}:domain/${Domain.Id}"
Path: /service-role/
ManagedPolicyArns:
- arn:aws:iam::aws:policy/service-role/AmazonDataZoneGlueManageAccessRolePolicy
DatazoneAthenaBlueprintBucket:
Type: AWS::S3::Bucket
Properties:
BucketName: !Sub
- "amazon-datazone-${AWS::AccountId}-${AWS::Region}-${DomainId}"
- DomainId: !Join [ '-', !Split [ '_', !GetAtt Domain.Id ] ]
PublicAccessBlockConfiguration:
BlockPublicAcls: true
BlockPublicPolicy: true
IgnorePublicAcls: true
RestrictPublicBuckets: true
BucketEncryption:
ServerSideEncryptionConfiguration:
- ServerSideEncryptionByDefault:
SSEAlgorithm: AES256
# Activate Athena Blueprint in current Region
AthenaLocalEnvironmentProfileConfiguration:
Type: AWS::DataZone::EnvironmentBlueprintConfiguration
Properties:
DomainIdentifier: !GetAtt Domain.Id
# DefaultDataLake or DefaultDataWarehouse
EnvironmentBlueprintIdentifier: DefaultDataLake
EnabledRegions:
- !Ref AWS::Region
RegionalParameters:
- Region: !Ref AWS::Region
Parameters:
S3Location: !Sub "s3://${DatazoneAthenaBlueprintBucket}"
ManageAccessRoleArn: !GetAtt ManageAccessRole.Arn
#already existing role, in my case, maybe this should be provisionned
ProvisioningRoleArn: !Sub "arn:aws:iam::${AWS::AccountId}:role/service-role/AmazonDataZoneProvisioning-${AWS::AccountId}"
Project:
Type: AWS::DataZone::Project
Properties:
DomainIdentifier: !GetAtt Domain.Id
Name: CloudformationTestProject
Description: Cloudformation Test Project
GlossaryTerms:
- sandbox
- test
AthenaLocalEnvironmentProfile:
Type: AWS::DataZone::EnvironmentProfile
Properties:
DomainIdentifier: !GetAtt Domain.Id
# This Environment Profile is attached to this project.
# Profiles may be referenced cross projects
ProjectIdentifier: !GetAtt Project.Id
Name: CloudformationTestProjectEnvProfile
Description: Cloudformation Test Project Environment Profile
AwsAccountId: !Ref AWS::AccountId
AwsAccountRegion: !Ref AWS::Region
EnvironmentBlueprintIdentifier: !GetAtt AthenaLocalEnvironmentProfileConfiguration.EnvironmentBlueprintId
AthenaLocalEnvironment:
Type: AWS::DataZone::Environment
Properties:
DomainIdentifier: !GetAtt Domain.Id
ProjectIdentifier: !GetAtt Project.Id
Name: CloudformationTestProjectEnv
Description: Cloudformation Test Project Environment
EnvironmentProfileIdentifier: !GetAtt AthenaLocalEnvironmentProfile.Id
# Note: If you create an Environment without any attached AWS::DataZone::DataSource,
# you will face the following issue when you attach additional DataSources
# CloudFormation cannot update a stack when a custom-named resource requires replacing. Rename dzd_xxxx|yyyy and update the stack again.
#
# You will need to rename the environment Resource, but this is not very handy..
# Update : this has changed, the default datasource is created automatcally now
Datasource:
Type: AWS::DataZone::DataSource
Properties:
DomainIdentifier: !GetAtt Domain.Id
ProjectIdentifier: !GetAtt Project.Id
EnvironmentIdentifier: !GetAtt AthenaLocalEnvironment.Id
Name: CloudformationTestProjectDatasource
# I don't want auto-publish to the catalog
PublishOnImport: false
# GenAI recommendations for is enable
Recommendation:
EnableBusinessNameGeneration: true
Type: GLUE
Schedule:
Schedule: cron(20 17 * * ? *)
# Timezone, undocumented, one of
# ISRAEL, ASIA_SHANGHAI, EUROPE_STOCKHOLM, CST6CDT, US_EASTERN, ETC_GMT, AUSTRALIA_SYDNEY, CANADA_CENTRAL,
# ASIA_DUBAI, UTC, ETC_GMT_NEG_0, ETC_GMT_NEG_1, ETC_GMT_NEG_2, ASIA_HONG_KONG, US_PACIFIC, ETC_GMT_NEG_3,
# ASIA_BAHRAIN, ETC_GMT_NEG_4, ETC_GMT_NEG_5, ETC_GMT_NEG_10, ETC_GMT_NEG_6, ETC_GMT_NEG_11, ETC_GMT_NEG_7,
# ETC_GMT_NEG_12, ETC_GMT_NEG_8, ETC_GMT_NEG_13, ETC_GMT0, ASIA_TAIPEI, US_MOUNTAIN, EUROPE_PARIS,
# ETC_GMT_NEG_9, ETC_GMT_NEG_14, ASIA_SINGAPORE, ASIA_TOKYO, ASIA_KUALA_LUMPUR, CET, US_CENTRAL, MST7MDT,
# EUROPE_ZURICH, EUROPE_LONDON, ETC_GMT_ADD_0, AUSTRALIA_MELBOURNE, ASIA_CALCUTTA, ETC_GMT_ADD_1,
# AMERICA_MONTREAL, ETC_GMT_ADD_2, AMERICA_SAO_PAULO, ETC_GMT_ADD_3, ETC_GMT_ADD_10, AFRICA_JOHANNESBURG,
# ETC_GMT_ADD_4, ETC_GMT_ADD_11, ASIA_BANGKOK, ETC_GMT_ADD_5, ETC_GMT_ADD_12, ETC_GMT_ADD_6, PACIFIC_AUCKLAND,
# EUROPE_DUBLIN, ETC_GMT_ADD_7, ASIA_JAKARTA, MEXICO_GENERAL, ETC_GMT_ADD_8, ETC_GMT_ADD_9, ASIA_SEOUL
Timezone: EUROPE_PARIS
Configuration:
GlueRunConfiguration:
RelationalFilterConfigurations:
- DatabaseName: !Ref Database1
FilterExpressions:
- Expression: '*'
Type: 'INCLUDE'
- DatabaseName: !Ref Database2
FilterExpressions:
- Expression: '*'
Type: 'INCLUDE'
Outputs:
DomainUrl:
Value: !GetAtt Domain.PortalUrl
#create stack
#aws cloudformation create-stack --stack-name datazone-test-project --template-body datazone-test-cf.yml --capabilities CAPABILITY_IAM --capabilities CAPABILITY_NAMED_IAM
#update stack
#aws cloudformation update-stack --stack-name datazone-test-project --template-body datazone-test-cf.yml --capabilities CAPABILITY_IAM --capabilities CAPABILITY_NAMED_IAM
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment