Skip to content

Instantly share code, notes, and snippets.

@cinek810
Last active December 3, 2019 16:28
Show Gist options
  • Select an option

  • Save cinek810/9e893aa103f70fcb40ac1d71af9fc988 to your computer and use it in GitHub Desktop.

Select an option

Save cinek810/9e893aa103f70fcb40ac1d71af9fc988 to your computer and use it in GitHub Desktop.

Revisions

  1. cinek810 revised this gist Dec 3, 2019. 1 changed file with 0 additions and 2 deletions.
    2 changes: 0 additions & 2 deletions slurm_commands.py
    Original file line number Diff line number Diff line change
    @@ -38,11 +38,9 @@ def __init__(self):
    super(squeue, self).__init__("squeue", gdb.COMMAND_DATA)

    def invoke(self, arg, from_tty):
    print("xxxxxx");
    self._print_all_jobs()

    def _print_all_jobs(self):
    print("yy")

    struct_job_record_p = gdb.lookup_type("struct job_record").pointer()

  2. cinek810 created this gist Dec 3, 2019.
    67 changes: 67 additions & 0 deletions slurm_commands.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,67 @@
    import gdb


    #rewrite slurm_msg_type to array num->str
    slurm_msg_type = gdb.lookup_type("slurm_msg_type_t")

    rpc_num2str=dict()
    for name,field in slurm_msg_type.items():
    rpc_num2str[field.enumval]=name

    class sdiag(gdb.Command):
    def __init__(self):
    super(sdiag, self).__init__("sdiag", gdb.COMMAND_DATA)

    def invoke(self, arg, from_tty):
    self._print_rpc_stats()

    def _print_rpc_stats(self):
    self._print_rpc_by("type")
    self._print_rpc_by("user")

    def _print_rpc_by(self,by_what):
    slurm_msg_type = gdb.lookup_type("slurm_msg_type_t")
    print("RPC BY "+by_what);
    for i in range(0,gdb.parse_and_eval("rpc_"+by_what+"_size")):
    rpc_id = gdb.parse_and_eval("rpc_"+by_what+"_id["+str(i)+"]")
    rpc_cnt = gdb.parse_and_eval("rpc_"+by_what+"_cnt["+str(i)+"]")
    rpc_time = gdb.parse_and_eval("rpc_"+by_what+"_time["+str(i)+"]")
    if rpc_cnt == 0:
    break
    if by_what == "type":
    print("%s \t (%s) \t count: %d \t ave_time:%d \t time:%d" %(rpc_num2str[int(rpc_id)],rpc_id,rpc_cnt,rpc_time/rpc_cnt,rpc_time))
    else:
    print("%s \t count: %d \t ave_time:%d \t time:%d" %(rpc_id,rpc_cnt,rpc_time/rpc_cnt,rpc_time))

    class squeue(gdb.Command):
    def __init__(self):
    super(squeue, self).__init__("squeue", gdb.COMMAND_DATA)

    def invoke(self, arg, from_tty):
    print("xxxxxx");
    self._print_all_jobs()

    def _print_all_jobs(self):
    print("yy")

    struct_job_record_p = gdb.lookup_type("struct job_record").pointer()

    job_ptr = gdb.parse_and_eval("job_list->head")
    job_count = gdb.parse_and_eval("job_list->count")
    print("Job list has %d items" %(job_count))
    for i in range(0,job_count):
    job_data = job_ptr["data"].cast(struct_job_record_p)
    self._print_job(job_data)
    job_ptr = job_ptr["next"]

    def _print_job(self,job_rec):
    print("%d|%s|%s|%s|%s|%s"%(job_rec["job_id"],job_rec["partition"],job_rec["user_name"],job_rec["name"],job_rec["nodes"],job_rec["state_desc"]))






    # Register commands:
    sdiag()
    squeue()