Skip to content

Instantly share code, notes, and snippets.

@rapartipoorna
Created August 3, 2021 17:53
Show Gist options
  • Select an option

  • Save rapartipoorna/3aa714980881f0661dceffafaf4754a0 to your computer and use it in GitHub Desktop.

Select an option

Save rapartipoorna/3aa714980881f0661dceffafaf4754a0 to your computer and use it in GitHub Desktop.
[ python script ] it will capture text files data and metadata of given folder(It might be have sub folders also) and writes into spreadsheet [Here used service account for google spreadsheet API] and creates two sheets . one for metadata and second sheet contains only URL data and corresponding file name
import pandas as pd
import os
import os.path, time
import gspread , re
regex_url = 'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+'
name = input('Please Enter Your Name : ')
Email = input('Please Enter Your office emailId : ')
directory = input('Input your directory path : ')
mainList = []
mainUrlList =[]
gc = gspread.service_account(filename='office_service_account.json')
sh = gc.open('secretData')
worksheet = sh.worksheet('Credentials')
values = worksheet.col_values(1)
for root, dirs, files in os.walk(directory):
for file in files:
innerList =[]
urlList = []
if file.endswith('.txt'):
filePath = root+'\\'+file
openFile = open(filePath, "rt")
data = openFile.read()
words = len(data.split())
lines = len(open(filePath).readlines())
size = os.stat(filePath).st_size
mod = time.ctime(os.path.getmtime(filePath))
created = time.ctime(os.path.getctime(filePath))
try:
content = pd.read_table(filePath)
content1 = str(content)
except:
content1 = 'None'
urls = re.findall(regex_url , content1)
if(len(urls)!=0):
urlList.append(file)
urlList.append(str(urls))
mainUrlList.append(urlList)
innerList.append(file)
innerList.append(size)
innerList.append(words)
innerList.append(lines)
innerList.append(created)
innerList.append(mod)
innerList.append(content1)
mainList.append(innerList)
try:
gsheet = gc.open('Emp_Notepad_Notes')
gsheet.share(Email, perm_type='user', role='writer')
gsheet.share(values[3], perm_type='user', role='writer')
except:
gsheet = gc.create('Emp_Notepad_Notes',folder_id = values[1])
gsheet.share(values[2], perm_type='user', role='writer')
gsheet.share(values[3], perm_type='user', role='writer')
gsheet.share(Email, perm_type='user', role='writer')
worksheet1=gsheet.add_worksheet(name+'_Notepad_Notes',rows=5000,cols=20,index=1)
worksheet2=gsheet.add_worksheet(name+'_Url_Files',rows=5000,cols=20,index=1)
headerRow_1 = ['filename' , 'size(Bytes)' , 'words' ,
'lines' , 'CreatedDate' , 'ModifiedDate' , 'content']
headerRow_2 =['filename','Urls']
gsheet.worksheet(name+'_Notepad_Notes').insert_row(headerRow_1)
gsheet.worksheet(name+'_Notepad_Notes').insert_rows(mainList,row=2)
gsheet.worksheet(name+'_Url_Files').insert_row(headerRow_2)
gsheet.worksheet(name+'_Url_Files').insert_rows(mainUrlList,row=2)
## Formatting header rows
worksheet1.format("A1:G1", {
"backgroundColor": {
"red": 0.0,
"green": 0.0,
"blue": 0.0
},
"horizontalAlignment": "CENTER",
"textFormat": {
"foregroundColor": {
"red": 1.0,
"green": 1.0,
"blue": 1.0
},
"fontSize": 12,
"bold": True
}
})
worksheet2.format("A1:B1", {
"backgroundColor": {
"red": 0.0,
"green": 0.0,
"blue": 0.0
},
"horizontalAlignment": "CENTER",
"textFormat": {
"foregroundColor": {
"red": 1.0,
"green": 1.0,
"blue": 1.0
},
"fontSize": 12,
"bold": True
}
})
print("Sheet with Name 'Emp_Notepad_Notes' shared with your Drive. you will not get any email notification \
please search with spreadsheetName in your drive")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment