Last active
July 15, 2021 09:59
-
-
Save a9QrX3Lu/40d855bf43ce2132522e1bce85346899 to your computer and use it in GitHub Desktop.
Generate plot for a dataset
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python | |
| # -*- coding: utf-8 -*- | |
| import os | |
| import sys | |
| import argparse | |
| import matplotlib.axes | |
| import numpy as np | |
| import matplotlib.pyplot as plt | |
| import pandas as pd | |
| from typing import Dict, Any | |
| COLORS = ["#0780cf", "#765005", "#fa6d1d", "#0e2c82", "#b6b51f", "#da1f18", | |
| "#701866", "#f47a75", "#009db2", "#024b51", "#0780cf", "#765005"] | |
| class PlotGen: | |
| def __init__(self, args: Dict[str, Any]): | |
| """ | |
| Prepare plotting | |
| Args: | |
| args: arguments | |
| """ | |
| self.args = args | |
| self.fig, self.ax = plt.subplots(figsize=(8, 4)) | |
| def gen_cdf(self) -> None: | |
| """ | |
| Generate CDF graph | |
| """ | |
| # Dataset in the input.txt should put every sample on a single line | |
| with open(self.args['input_file'], 'r') as f: | |
| lines = f.readlines() | |
| x = np.array( | |
| [int(l) for l in lines if int(l) < int(self.args['upbound'])]) | |
| n_bins = 100 | |
| # plot the cumulative histogram | |
| self.ax.hist(x, n_bins, density=True, histtype='step', | |
| cumulative=True) | |
| self.args['ylabel'] = 'CDF' | |
| def gen_line(self) -> None: | |
| """ | |
| Generate line plot from dateset | |
| """ | |
| if self.args['xcol'] is None or self.args['ycol'] is None: | |
| print("Axis is not specified, try to specify one by '-x' or '-y'.", | |
| file=sys.stderr) | |
| exit(-1) | |
| df = pd.read_csv(self.args['input_file'], sep=self.args.sep) | |
| xs = df[self.args['xcol']] | |
| ys = df[self.args['ycol']] | |
| ax.plot(xs, ys) | |
| def plot(self): | |
| """ | |
| Process plot | |
| """ | |
| if self.args['plot'] == "cdf": | |
| self.gen_cdf() | |
| elif self.args['plot'] == "line": | |
| self.gen_line() | |
| else: | |
| print(f"{self.args['plot']} is not supported", file=sys.stderr) | |
| exit(-1) | |
| if self.args['upbound'] is not None: | |
| self.ax.set_xlim(0, self.args['upbound']) | |
| self.ax.grid(True) | |
| self.ax.set_xlabel(self.args['x']) | |
| self.ax.set_ylabel(self.args['y']) | |
| plt.show() | |
| def main(): | |
| parser = argparse.ArgumentParser( | |
| description="Generate plots from datasets.") | |
| parser.add_argument('-p', '--plot', type=str, required=True, | |
| help="plot type", | |
| metavar="[cdf|line]") | |
| parser.add_argument('-x', type=str, required=True, | |
| help="label for x axis") | |
| parser.add_argument('-y', type=str, help="label for y axis") | |
| parser.add_argument('-u', '--upbound', type=int, | |
| help="if a data sample is larger than upbound, " | |
| "it is ignored") | |
| parser.add_argument('-s', '--sep', type=str, default=',', | |
| help="delimiter in dataset") | |
| parser.add_argument('--xcol', type=str, help="column as x axis") | |
| parser.add_argument('--ycol', type=str, help="column as y axis") | |
| parser.add_argument('-f', '--input-file', type=str, default='input.txt', | |
| help="input dataset") | |
| args = vars(parser.parse_args()) | |
| if not os.path.exists(args["input_file"]): | |
| print(f"Input dataset {args['input_file']} is not found.", | |
| file=sys.stderr) | |
| exit(-1) | |
| plot_gen = PlotGen(args) | |
| plot_gen.plot() | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment