kristiyanto’s gists

kristiyanto / matcher.py

Created July 3, 2024 15:25

NLP: NER using rule based matcher - Medium Article

	# Refer to the Jupyter Notebook and article for package imports and the complete code.

	def extract_keywords(text, max_keywords=10):
	doc = nlp(text)
	matcher = Matcher(nlp.vocab)

	# Noun and Noun Phrases
	noun_phrases_patterns = [
	[{'POS': 'NUM'}, {'POS': 'NOUN'}], #example: 2 bedrooms
	[{'POS': 'ADJ', 'OP': '*'}, {'POS': 'NOUN'}], #example: beautiful house

kristiyanto / t5.py

Created July 3, 2024 14:41

NLP: LLM for text summarization Medium Article

	# Refer to the Jupyter Notebook and article for package imports and the complete code.

	model_name = "t5-small"
	tokenizer = T5Tokenizer.from_pretrained(model_name, legacy=False)
	model = T5ForConditionalGeneration.from_pretrained(model_name)

	def summarize_with_t5(text, max_length=80):

	if len(text) < max_length:
	return text

kristiyanto / sent_score.py

Created July 3, 2024 14:34

NLP: Text Summarization for Medium Article

	# Refer to the Jupyter Notebook and article for package imports and the complete code.

	def summarize(text, char_limit=80):
	doc = nlp(text.description)
	sentences = [sent.text.strip() for sent in doc.sents]
	keywords = text.keywords

	if not keywords or not sentences:
	return ""

kristiyanto / custom_lemma.py

Created July 3, 2024 14:30

NLP: Custom Lemma for Medium Article

	# Refer to the Jupyter Notebook and article for package imports and the complete code.

	nlp = spacy.load("en_core_web_sm")
	@Language.component("custom_lemma_component")
	def custom_lemma_component(doc):
	custom_lemmas = {
	"br": "bedroom",
	"apt": "apartment",
	"st": "street",
	"min": "minute",

kristiyanto / logger.ts

Created June 23, 2024 09:10

logging_with_winston_medium_2024

	/**
	* Transmit logs to Google Cloud Logging
	*
	* Setup Instructions:
	* 1. Generate and Install Google Service Account Credentials:
	* - Ensure the Service Account has Log Writer access.
	* - Follow the instructions here: https://developers.google.com/workspace/guides/create-credentials
	*
	* 2. Configure env variables:
	* When enabled, Logs will be transmitted to both the console and Google Cloud Logging.

kristiyanto / fintech_ab_testing_stratified_random_sample.py

Created May 31, 2021 08:15

	heartbeat = transaction.select(f.max('created_date')).collect()[0][0] - pd.Timedelta('60 days')

	all_users = (transaction.filter(f.col('created_date')>f.lit(heartbeat))
	.select('user_id', f.concat_ws('\|', 'birth_year', 'device').alias('stratum'), 'home_city')
	.distinct()
	)

	fractions = all_users.select('stratum').distinct().withColumn('frac', lit(0.5)).rdd.collectAsMap()

	group_A = all_users.sampleBy('stratum', fractions, 555)

kristiyanto / fintech_engagement_recommender_pipeline.py

Created May 31, 2021 08:05

	from pyspark.ml import Pipeline
	from pyspark.ml.recommendation import ALS
	from pyspark.ml.evaluation import RegressionEvaluator
	from pyspark.ml.feature import IndexToString, StringIndexer

	# Model
	als = ALS(maxIter=5, regParam=0.01, implicitPrefs=True, seed=777, nonnegative=True, coldStartStrategy='drop',
	userCol='user_index', itemCol='context_index', ratingCol='count')
	rmse = RegressionEvaluator(metricName='rmse', labelCol='count',
	predictionCol='prediction')

kristiyanto / fintech_model_training.py

Created May 28, 2021 08:01

	from sklearn.pipeline import Pipeline
	from sklearn.feature_selection import VarianceThreshold, SelectFromModel
	from sklearn.model_selection import GridSearchCV, StratifiedKFold
	from sklearn.preprocessing import PowerTransformer, FunctionTransformer
	from sklearn.compose import ColumnTransformer, make_column_transformer
	from sklearn.metrics import roc_auc_score, precision_score, recall_score, accuracy_score, plot_roc_curve
	from sklearn.ensemble import RandomForestClassifier
	from sklearn.linear_model import RidgeClassifier
	from sklearn.dummy import DummyClassifier
	import warnings

kristiyanto / fintech_engagement_feature_pipeline.py

Last active May 28, 2021 08:00

	from fintech.features import activity, demographic, location, recency
	from pyspark.ml import Pipeline


	def generateFeatures(transaction, users):
	feature_list = [
	# HISTORICAL ACTIVITIES
	activity.f_user_transcAmount_min(transc_table=transaction),
	activity.f_user_transcAmount_max(transc_table=transaction),
	activity.f_user_transcAmount_total(transc_table=transaction),

kristiyanto / fintech_engagement_feature_module.py

Created May 28, 2021 07:50

Fintech Engagement: feature computation

	class f_user_daysSinceLastTransc(Transformer):
	''' Feature description: Number of days since last transaction.
	'''

	@keyword_only
	def __init__(self, transc_table, **kwargs):
	super(f_user_daysSinceLastTransc, self).__init__()
	self.transc_table = transc_table
	self.feature_names = "user_daysSinceLastTransc"

Daniel Kristiyanto kristiyanto