This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Refer to the Jupyter Notebook and article for package imports and the complete code. | |
| def extract_keywords(text, max_keywords=10): | |
| doc = nlp(text) | |
| matcher = Matcher(nlp.vocab) | |
| # Noun and Noun Phrases | |
| noun_phrases_patterns = [ | |
| [{'POS': 'NUM'}, {'POS': 'NOUN'}], #example: 2 bedrooms | |
| [{'POS': 'ADJ', 'OP': '*'}, {'POS': 'NOUN'}], #example: beautiful house |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Refer to the Jupyter Notebook and article for package imports and the complete code. | |
| model_name = "t5-small" | |
| tokenizer = T5Tokenizer.from_pretrained(model_name, legacy=False) | |
| model = T5ForConditionalGeneration.from_pretrained(model_name) | |
| def summarize_with_t5(text, max_length=80): | |
| if len(text) < max_length: | |
| return text |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Refer to the Jupyter Notebook and article for package imports and the complete code. | |
| def summarize(text, char_limit=80): | |
| doc = nlp(text.description) | |
| sentences = [sent.text.strip() for sent in doc.sents] | |
| keywords = text.keywords | |
| if not keywords or not sentences: | |
| return "" | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Refer to the Jupyter Notebook and article for package imports and the complete code. | |
| nlp = spacy.load("en_core_web_sm") | |
| @Language.component("custom_lemma_component") | |
| def custom_lemma_component(doc): | |
| custom_lemmas = { | |
| "br": "bedroom", | |
| "apt": "apartment", | |
| "st": "street", | |
| "min": "minute", |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| /** | |
| * Transmit logs to Google Cloud Logging | |
| * | |
| * Setup Instructions: | |
| * 1. Generate and Install Google Service Account Credentials: | |
| * - Ensure the Service Account has Log Writer access. | |
| * - Follow the instructions here: https://developers.google.com/workspace/guides/create-credentials | |
| * | |
| * 2. Configure env variables: | |
| * When enabled, Logs will be transmitted to both the console and Google Cloud Logging. |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| heartbeat = transaction.select(f.max('created_date')).collect()[0][0] - pd.Timedelta('60 days') | |
| all_users = (transaction.filter(f.col('created_date')>f.lit(heartbeat)) | |
| .select('user_id', f.concat_ws('|', 'birth_year', 'device').alias('stratum'), 'home_city') | |
| .distinct() | |
| ) | |
| fractions = all_users.select('stratum').distinct().withColumn('frac', lit(0.5)).rdd.collectAsMap() | |
| group_A = all_users.sampleBy('stratum', fractions, 555) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from pyspark.ml import Pipeline | |
| from pyspark.ml.recommendation import ALS | |
| from pyspark.ml.evaluation import RegressionEvaluator | |
| from pyspark.ml.feature import IndexToString, StringIndexer | |
| # Model | |
| als = ALS(maxIter=5, regParam=0.01, implicitPrefs=True, seed=777, nonnegative=True, coldStartStrategy='drop', | |
| userCol='user_index', itemCol='context_index', ratingCol='count') | |
| rmse = RegressionEvaluator(metricName='rmse', labelCol='count', | |
| predictionCol='prediction') |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from sklearn.pipeline import Pipeline | |
| from sklearn.feature_selection import VarianceThreshold, SelectFromModel | |
| from sklearn.model_selection import GridSearchCV, StratifiedKFold | |
| from sklearn.preprocessing import PowerTransformer, FunctionTransformer | |
| from sklearn.compose import ColumnTransformer, make_column_transformer | |
| from sklearn.metrics import roc_auc_score, precision_score, recall_score, accuracy_score, plot_roc_curve | |
| from sklearn.ensemble import RandomForestClassifier | |
| from sklearn.linear_model import RidgeClassifier | |
| from sklearn.dummy import DummyClassifier | |
| import warnings |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from fintech.features import activity, demographic, location, recency | |
| from pyspark.ml import Pipeline | |
| def generateFeatures(transaction, users): | |
| feature_list = [ | |
| # HISTORICAL ACTIVITIES | |
| activity.f_user_transcAmount_min(transc_table=transaction), | |
| activity.f_user_transcAmount_max(transc_table=transaction), | |
| activity.f_user_transcAmount_total(transc_table=transaction), |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| class f_user_daysSinceLastTransc(Transformer): | |
| ''' Feature description: Number of days since last transaction. | |
| ''' | |
| @keyword_only | |
| def __init__(self, transc_table, **kwargs): | |
| super(f_user_daysSinceLastTransc, self).__init__() | |
| self.transc_table = transc_table | |
| self.feature_names = "user_daysSinceLastTransc" |