Initial Commit

This commit is contained in:
2024-12-24 18:02:35 +01:00
commit 79438ff053
9 changed files with 5092 additions and 0 deletions

22
.gitignore vendored Normal file
View File

@@ -0,0 +1,22 @@
# Python-generated files
__pycache__/
*.py[oc]
build/
dist/
wheels/
*.egg-info
# Virtual environments
.venv
### JupyterNotebooks ###
# gitignore template for Jupyter Notebooks
# website: http://jupyter.org/
.ipynb_checkpoints
*/.ipynb_checkpoints/*
# IPython
profile_default/
ipython_config.py

1
.python-version Normal file
View File

@@ -0,0 +1 @@
3.12

2356
Process2.ipynb Normal file

File diff suppressed because one or more lines are too long

21
README.md Normal file
View File

@@ -0,0 +1,21 @@
# WorkerLogs Analysis
### How to launch Jupyter in Nu using Pueue:
```
task spawn {uv run jupyter lab}
```
### Data Pipeline:
0. `uv sync` to init the state of the project (I think)
1. Run the parse1.py on the txt data
```
uv run parse1.py testdata.txt test_out.csv
```
2. Open `Process2.ipynb` on Jupyter Lab and run the statistics
```
uv run jupyter lab
```
### Other info:
- `used_fetch_command.sh` e' il comando che ho usato su docker di production per ottenere i dati
- `preprocess.nu` tentativo di fare parsing con Nushell

39
parse1.py Normal file
View File

@@ -0,0 +1,39 @@
# /// script
# requires-python = ">=3.10"
# dependencies = []
# ///
import re
import sys
parser = re.compile(r'celery_worker-(?P<workerid>\d+?)\s+\|\s+\[(?P<date>.*?) (?P<time>.*?): (?P<tag>.*?)\] Task (?P<taskname>.*?)\[(?P<taskid>.*?)\] (?P<msg>.*?)$')
def main() -> None:
path_in = sys.argv[1] if len(sys.argv) > 1 else 'testdata.txt'
path_out = sys.argv[2] if len(sys.argv) > 2 else 'out.txt'
with open(path_in, 'r') as fin:
with open(path_out, 'w') as fout:
fout.write("workerid,datetime,tag,taskname,taskid,msg,msgtime\n")
while True:
line = fin.readline()
if line == "":
break
if not "Task" in line:
continue
m = parser.search(line)
if m is None:
continue
d = m.groupdict()
msgsplit = d["msg"].split(' ')
msg = msgsplit[0]
if msg == 'succeeded':
msgtime = msgsplit[2][:-1][:7]
else:
msgtime= ""
out = f'{d["workerid"]},{d["date"]}T{d["time"].replace(",",".")}Z,{d["tag"]},{d["taskname"]},{d["taskid"]},{msg},{msgtime}\n'
fout.write(out)
pass
pass
if __name__ == "__main__":
main()

2
preprocess.nu Normal file
View File

@@ -0,0 +1,2 @@
#!/bin/nu
open testdata.txt | grep Task | lines | parse -r 'celery_worker-(?<workerid>\d+?)\s+\|\s+\[(?<date>.*?) (?<time>.*?): (?<tag>.*?)\] Task (?<taskname>.*?)\[(?<taskid>.*?)\] (?<msg>.*?)\z' | insert msgtime {|row| $row.msg | parse -r '[a-zA-Z ]+(?<t>[\d.]*)s.*' | get -i 0.t} | update msg {|row| $row.msg | split words | get 0}

20
pyproject.toml Normal file
View File

@@ -0,0 +1,20 @@
[project]
name = "workerlogs-analysis"
version = "0.1.0"
description = "Add your description here"
readme = "README.md"
requires-python = ">=3.12"
dependencies = [
"hvplot>=0.11.2",
"plotly>=5.24.1",
"polars[all]>=1.17.1",
"vegafusion[embed]>=1.5.0",
"vl-convert-python>=1.6.0",
]
[dependency-groups]
dev = [
"anywidget>=0.9.13",
"jupyter-bokeh>=4.0.5",
"jupyterlab>=4.3.4",
]

1
used_fetch_command.sh Normal file
View File

@@ -0,0 +1 @@
docker compose logs --since="2024-12-16T00:00:00+01:00" --until="2024-12-17T00:00:00+01:00" celery_worker > workerlogs_2024_12_16.txt

2630
uv.lock generated Normal file

File diff suppressed because it is too large Load Diff