Skip to content

Instantly share code, notes, and snippets.

@rapartipoorna
Created July 27, 2021 11:46
Show Gist options
  • Select an option

  • Save rapartipoorna/3a69251d6e2b1e760a5e25ddea602927 to your computer and use it in GitHub Desktop.

Select an option

Save rapartipoorna/3a69251d6e2b1e760a5e25ddea602927 to your computer and use it in GitHub Desktop.
[ python script ] it will capture text files data and metadata of given folder and writes into spreadsheet [Here used service account for google spreadsheet API] and creates two sheets . one for metadata and second sheet contains only URL data and corresponding file name
import pandas as pd
import os
import os.path, time
import gspread , re
regex_url = 'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+'
name = input('Please Enter Your Name : ')
Email = input('Please Enter Your office emailId : ')
directory = input('Input your directory path : ')
mainList = []
mainUrlList =[]
gc = gspread.service_account(filename='office_service_account.json')
sh = gc.open('secretData')
worksheet = sh.worksheet('Credentials')
values = worksheet.col_values(1)
for filename in os.listdir(directory):
innerList =[]
urlList = []
filePath = directory+'\\'+filename
if filename.endswith(".txt"):
file = open(filePath, "rt")
data = file.read()
words = len(data.split())
lines = len(open(filePath).readlines())
size = os.stat(filePath).st_size
mod = time.ctime(os.path.getmtime(filePath))
created = time.ctime(os.path.getctime(filePath))
try:
content = pd.read_table(filePath)
content1 = str(content)
except:
content1 = 'None'
urls = re.findall(regex_url , content1)
if(len(urls)!=0):
urlList.append(filename)
urlList.append(str(urls))
mainUrlList.append(urlList)
innerList.append(filename)
innerList.append(size)
innerList.append(words)
innerList.append(lines)
innerList.append(created)
innerList.append(mod)
innerList.append(content1)
mainList.append(innerList)
try:
gsheet = gc.open('Emp_Notepad_Notes')
gsheet.share(Email, perm_type='user', role='writer')
gsheet.share(values[3], perm_type='user', role='writer')
except:
gsheet = gc.create('Emp_Notepad_Notes',folder_id = values[1])
gsheet.share(values[2], perm_type='user', role='writer')
gsheet.share(values[3], perm_type='user', role='writer')
gsheet.share(Email, perm_type='user', role='writer')
worksheet1=gsheet.add_worksheet(name+'_Notepad_Notes',rows=5000,cols=20,index=1)
worksheet2=gsheet.add_worksheet(name+'_Url_Files',rows=5000,cols=20,index=1)
headerRow_1 = ['filename' , 'size(Bytes)' , 'words' ,
'lines' , 'CreatedDate' , 'ModifiedDate' , 'content']
headerRow_2 =['filename','Urls']
gsheet.worksheet(name+'_Notepad_Notes').insert_row(headerRow_1)
gsheet.worksheet(name+'_Notepad_Notes').insert_rows(mainList,row=2)
gsheet.worksheet(name+'_Url_Files').insert_row(headerRow_2)
gsheet.worksheet(name+'_Url_Files').insert_rows(mainUrlList,row=2)
## Formatting header rows
worksheet1.format("A1:G1", {
"backgroundColor": {
"red": 0.0,
"green": 0.0,
"blue": 0.0
},
"horizontalAlignment": "CENTER",
"textFormat": {
"foregroundColor": {
"red": 1.0,
"green": 1.0,
"blue": 1.0
},
"fontSize": 12,
"bold": True
}
})
worksheet2.format("A1:B1", {
"backgroundColor": {
"red": 0.0,
"green": 0.0,
"blue": 0.0
},
"horizontalAlignment": "CENTER",
"textFormat": {
"foregroundColor": {
"red": 1.0,
"green": 1.0,
"blue": 1.0
},
"fontSize": 12,
"bold": True
}
})
print("Sheet with Name 'Emp_Notepad_Notes' shared with your Drive. you will not get any email notification \
please search with spreadsheetName in your drive")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment