Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Select an option

  • Save robertkraig-bambee/4dff7d453121480dd56b9bc018b65e44 to your computer and use it in GitHub Desktop.

Select an option

Save robertkraig-bambee/4dff7d453121480dd56b9bc018b65e44 to your computer and use it in GitHub Desktop.
############################################################################################################
################## #############################
################## #############################
This Gist collection contains all scripts for python
################## #############################
################## #############################
############################################################################################################
import pandas as pd
def fetchFieldFromCsvFile(file):
for row in file.itertuples():
Arr.append(row.csv_header)
print(len(Arr))
print(Arr)
def bifurcateDuplicateAndUniqueValuesFromCSV(file):
# Get duplicate from csv file
""" example csv file -
csv_header
"duplicate"
"duplicate"
"unique"
Output -
duplicate.csv contains duplicate value
unique.csv contains unique values
"""
unique = []
duplicate = []
count = 0
for row in file.itertuples():
count+=1
if row.csv_header in unique:
duplicate.append(row.csv_header)
else:
unique.append(row.csv_header)
print(count)
print("Number of duplicate records ", len(duplicate))
print("Number of unique records ", len(unique))
pd.DataFrame(duplicate).to_csv("duplicate.csv", header=False, index=False)
pd.DataFrame(unique).to_csv("unique.csv", header=False, index=False)
return unique
file = pd.read_csv("csvFile.csv")
fetchFieldFromCsvFile(file)
bifurcateDuplicateAndUniqueValuesFromCSV(file)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment