Skip to content

Instantly share code, notes, and snippets.

@TohidN
Created February 3, 2024 14:14
Show Gist options
  • Select an option

  • Save TohidN/62a9d5bc1db2440ba4f1968606144a60 to your computer and use it in GitHub Desktop.

Select an option

Save TohidN/62a9d5bc1db2440ba4f1968606144a60 to your computer and use it in GitHub Desktop.
Get number of lines in file (comparing time of multiple functions)
import timeit
import csv
import pandas as pd
filename = '../datasets/data.tsv'
def talktime(filename, funcname, func):
print(f"# {funcname}")
t = timeit.timeit(f'{funcname}("{filename}")', setup=f'from __main__ import {funcname}', number = 100) / 100
print('Elapsed time : ', t)
print('n = ', func(filename))
print('\n')
def mmap_reader(file_path):
import mmap
with open(file_path, "r+") as fp:
buf = mmap.mmap(fp.fileno(), 0)
lines = 0
while buf.readline():
lines += 1
return lines
talktime(filename, 'mmap_reader', mmap_reader)
def sum1forline(filename):
with open(filename, encoding="utf8") as f:
return sum(1 for line in f)
talktime(filename, 'sum1forline', sum1forline)
def lenopenreadlines(filename):
with open(filename, encoding="utf8") as f:
return len(f.readlines())
talktime(filename, 'lenopenreadlines', lenopenreadlines)
def csvreaderfor(filename):
cnt = 0
with open(filename, encoding="utf8") as f:
cr = csv.reader(f)
for row in cr:
cnt += 1
return cnt
talktime(filename, 'csvreaderfor', csvreaderfor)
def openenum(filename):
cnt = 0
with open(filename, encoding="utf8") as f:
for i, line in enumerate(f,1):
cnt += 1
return cnt
talktime(filename, 'openenum', openenum)
def lenpd(filename):
return len(pd.read_csv(filename, encoding="utf8")) + 1
talktime(filename, 'lenpd', lenpd)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment