Browse Source

humanized logging

master
Sean Johnson 9 months ago
parent
commit
9177ab12c3
  1. 6
      resource/Dockerfile
  2. 1
      resource/requirements.txt
  3. 120
      resource/tools/backup-volumes.py

6
resource/Dockerfile

@ -27,6 +27,10 @@ FROM ${ALPINE_REPO}:${ALPINE_VERSION}
COPY --from=build /go/bin/convoy /bin/convoy
ADD tools /tools
RUN apk add --no-cache bash jq lvm2-libs python3
COPY requirements.txt /
RUN apk add --no-cache bash curl jq lvm2-libs python3 && \
curl https://bootstrap.pypa.io/get-pip.py | python3 - && \
pip install -r /requirements.txt && \
rm /requirements.txt
ENTRYPOINT ["/bin/convoy"]

1
resource/requirements.txt

@ -0,0 +1 @@
aiodocker>=0.19.1,>0.20.0

120
resource/tools/backup-volumes.py

@ -23,46 +23,6 @@ KEEP_LAST_BACKUPS = int(os.getenv("KEEP_LAST_BACKUPS", 7))
KEEP_LAST_SNAPSHOTS = int(os.getenv("KEEP_LAST_SNAPSHOTS", 7))
ONLY_MATCHING = re.compile(os.getenv("ONLY_MATCHING", r"^.*$"))
MSG_STARTING = "convoy-volume-backup.loop.starting"
MSG_COMPLETED = "convoy-volume-backup.loop.iteration-completed"
MSG_ERROR = "convoy-volume-backup.loop.error"
MSG_CONVOY_CHECK = "convoy.checking"
MSG_CONVOY_OKAY = MSG_CONVOY_CHECK + ".success"
MSG_CONVOY_FAIL = MSG_CONVOY_CHECK + ".failed"
MSG_FETCHING_VOLUMES = "convoy.volumes.fetch"
MSG_FETCHING_VOLUMES_SUCCESS = MSG_FETCHING_VOLUMES + ".success"
MSG_FETCHING_VOLUMES_FAILED = MSG_FETCHING_VOLUMES + ".failed"
MSG_VOLUME_SNAPSHOT = "convoy.volume.create-snapshot"
MSG_VOLUME_SNAPSHOT_SUCCESS = MSG_VOLUME_SNAPSHOT + ".success"
MSG_VOLUME_SNAPSHOT_FAILED = MSG_VOLUME_SNAPSHOT + ".failed"
MSG_SNAPSHOT_BACKUP = "convoy.snapshot.create-backup"
MSG_SNAPSHOT_BACKUP_SUCCESS = MSG_SNAPSHOT_BACKUP + ".success"
MSG_SNAPSHOT_BACKUP_FAILED = MSG_SNAPSHOT_BACKUP + ".failed"
MSG_CLEAN_BACKUPS = "convoy.snapshot.clean-backups"
MSG_CLEAN_BACKUPS_SUCCESS = MSG_CLEAN_BACKUPS + ".success"
MSG_CLEAN_BACKUPS_FAILED = MSG_CLEAN_BACKUPS + ".failed"
MSG_SCAN_BACKUPS = MSG_CLEAN_BACKUPS + ".scanning"
MSG_SCAN_BACKUPS_SUCCESS = MSG_SCAN_BACKUPS + ".success"
MSG_SCAN_BACKUPS_FAILED = MSG_SCAN_BACKUPS + ".failed"
MSG_CLEAN_SNAPSHOTS = "convoy.volume.clean-snapshots"
MSG_CLEAN_SNAPSHOTS_SUCCESS = MSG_CLEAN_SNAPSHOTS + ".success"
MSG_CLEAN_SNAPSHOTS_FAILED = MSG_CLEAN_SNAPSHOTS + ".failed"
MSG_SCAN_SNAPSHOTS = MSG_CLEAN_SNAPSHOTS + ".scanning"
MSG_SCAN_SNAPSHOTS_SUCCESS = MSG_SCAN_SNAPSHOTS + ".success"
MSG_SCAN_SNAPSHOTS_FAILED = MSG_SCAN_SNAPSHOTS + ".failed"
MSG_BACKUP_TASK = "convoy-volume-backup.task"
MSG_BACKUP_TASK_SUCCESS = MSG_BACKUP_TASK + ".success"
MSG_BACKUP_TASK_FAILED = MSG_BACKUP_TASK + ".failed"
def timestamp_filename() -> str:
return datetime.now().strftime("%Y-%m-%dT%H-%M-%SZ%z")
@ -90,7 +50,7 @@ def excepthook(etype: Type[T], value: T, tb: "traceback object"):
"traceback": "".join(traceback.format_exception(etype, value, tb)),
}
log_stdout(MSG_ERROR, **context)
log_stdout("An error occurred", **context)
def log_stdout(message, exception: Optional[Exception] = None, **context):
@ -156,16 +116,15 @@ async def get_proc_response(
async def backup_all_volumes():
log_stdout(MSG_CONVOY_CHECK)
log_stdout("Checking on Convoy")
info = await get_proc_response(await convoy("info"))
log_stdout(MSG_CONVOY_OKAY, info=info)
log_stdout("Convoy appears okay", info=info)
log_stdout(MSG_FETCHING_VOLUMES)
volumes = await get_proc_response(
await convoy("list"), onfail_msg=MSG_FETCHING_VOLUMES_FAILED
await convoy("list"), onfail_msg="Encountered an error while fetching volumes",
)
log_stdout(MSG_FETCHING_VOLUMES_SUCCESS)
log_stdout("Successfully fetched volumes, starting backups")
# Filter out volumes that are not matching
volumes = list(filter(lambda i: ONLY_MATCHING.findall(i[0]), list(volumes.items())))
@ -183,14 +142,14 @@ async def backup_all_volumes():
if (err := task.exception()) is not None:
frames = [traceback.format_stack(frame) for frame in task.get_stack()]
log_stdout(
MSG_BACKUP_TASK_FAILED,
"Backup task failed",
exception=err,
traceback=frames,
)
async def backup_volume(vol_name: str, vol_data: str):
log_stdout(MSG_BACKUP_TASK, vol_name=vol_name)
log_stdout(f"Starting backup for {vol_name}", vol_name=vol_name)
try:
# Create a new snapshot
@ -204,45 +163,54 @@ async def backup_volume(vol_name: str, vol_data: str):
# Clean up old backups
await clean_older_backups(vol_name)
# Done!
log_stdout(f"Backup succeeded for {vol_name}", vol_name=vol_name)
return True
except Exception as err:
log_stdout(
MSG_BACKUP_TASK_FAILED,
f"Backup failed for {vol_name}",
vol_name=vol_name,
exception=err,
)
# Done!
log_stdout(MSG_BACKUP_TASK_SUCCESS, vol_name=vol_name)
return False
async def perform_volume_snapshot(vol_name: str, vol_data: dict):
log_stdout(MSG_VOLUME_SNAPSHOT, vol_name=vol_name)
log_stdout(f"Creating snapshot for volume {vol_name}", vol_name=vol_name)
# Make a new snapshot
snapshot_name = f"{vol_name}-{timestamp_filename()}"
await get_proc_response(
await convoy("snapshot", "create", vol_name, "--name", snapshot_name,),
json_decode=False,
onfail_msg=MSG_VOLUME_SNAPSHOT_FAILED,
onfail_msg=f"Snapshot for volume {vol_name} failed",
)
log_stdout(
MSG_VOLUME_SNAPSHOT_SUCCESS, vol_name=vol_name, snapshot_name=snapshot_name
f"Snapshot succeeded for volume {vol_name}",
vol_name=vol_name,
snapshot_name=snapshot_name,
)
return snapshot_name
async def perform_snapshot_backup(volume_name: str, snapshot_name: str):
log_stdout(MSG_SNAPSHOT_BACKUP, vol_name=volume_name, snapshot_name=snapshot_name)
log_stdout(
f"Creating backup of snapshot {snapshot_name} / volume {volume_name}",
vol_name=volume_name,
snapshot_name=snapshot_name,
)
backup_uri = await get_proc_response(
await convoy("backup", "create", "--dest", BACKUP_DESTINATION, snapshot_name,),
json_decode=False,
onfail_msg=MSG_SNAPSHOT_BACKUP_FAILED,
onfail_msg=f"Backup failed for snapshot {snapshot_name} / volume {volume_name}",
)
log_stdout(
MSG_SNAPSHOT_BACKUP_SUCCESS,
f"Backup succeeded for snapshot {snapshot_name} / volume {volume_name}",
vol_name=volume_name,
snapshot_name=snapshot_name,
backup_uri=backup_uri.decode("utf-8"),
@ -250,19 +218,23 @@ async def perform_snapshot_backup(volume_name: str, snapshot_name: str):
return backup_uri
async def clean_older_backups(vol_name: dict):
async def clean_older_backups(vol_name: str):
if KEEP_LAST_BACKUPS == -1:
return
log_stdout(MSG_SCAN_BACKUPS, vol_name=vol_name)
log_stdout(f"Scanning volume {vol_name} for prunable backups", vol_name=vol_name)
backups = await get_proc_response(
await convoy("backup", "list", "--volume-name", vol_name, BACKUP_DESTINATION,),
onfail_msg=MSG_SCAN_BACKUPS_FAILED,
onfail_msg=f"Backup scan failed for volume {vol_name}",
)
backups = sorted(backups.values(), key=lambda d: d.get("CreatedTime"))
log_stdout(MSG_SCAN_BACKUPS_SUCCESS, vol_name=vol_name, total_count=len(backups))
log_stdout(
f"Backup scan succeeded for volume {vol_name}",
vol_name=vol_name,
total_count=len(backups),
)
if len(backups) > KEEP_LAST_BACKUPS:
remove_urls = list(
@ -273,7 +245,7 @@ async def clean_older_backups(vol_name: dict):
)
log_stdout(
MSG_CLEAN_BACKUPS,
f"Cleaning {len(remove_urls)} backups for volume {vol_name}",
vol_name=vol_name,
remove_count=len(remove_urls),
total_count=len(backups),
@ -284,10 +256,12 @@ async def clean_older_backups(vol_name: dict):
await get_proc_response(
await convoy("backup", "delete", backup_url,),
json_decode=False,
onfail_msg=MSG_CLEAN_BACKUPS_FAILED,
onfail_msg=f"Failed to delete backup {backup_url} for volume {vol_name}",
)
log_stdout(MSG_CLEAN_BACKUPS_SUCCESS, vol_name=vol_name)
log_stdout(
f"Done cleaning old backups for {vol_name}", vol_name=vol_name
)
async def clean_older_snapshots(vol_data: dict):
@ -295,13 +269,13 @@ async def clean_older_snapshots(vol_data: dict):
return
vol_name = vol_data["Name"]
log_stdout(MSG_SCAN_SNAPSHOTS, vol_name=vol_name)
log_stdout(f"Scanning for old snapshots for volume {vol_name}", vol_name=vol_name)
snapshots = sorted(
vol_data["Snapshots"].values(), key=lambda d: d.get("CreatedTime")
)
log_stdout(
MSG_SCAN_SNAPSHOTS_SUCCESS, vol_name=vol_name, total_count=len(snapshots)
f"Snapshot scan succeeded for {vol_name}", vol_name=vol_name, total_count=len(snapshots)
)
if len(snapshots) > KEEP_LAST_SNAPSHOTS:
@ -313,7 +287,7 @@ async def clean_older_snapshots(vol_data: dict):
)
log_stdout(
MSG_CLEAN_SNAPSHOTS,
f"Cleaning {len(remove_names)} old snapshots for volume {vol_name}",
vol_name=vol_name,
remove_count=len(remove_names),
total_count=len(snapshots),
@ -324,24 +298,24 @@ async def clean_older_snapshots(vol_data: dict):
await get_proc_response(
await convoy("snapshot", "delete", snapshot_name,),
json_decode=False,
onfail_msg=MSG_CLEAN_SNAPSHOTS_FAILED,
onfail_msg=f"Failed to remove snapshot {snapshot_name} for volume {vol_name}",
)
log_stdout(MSG_CLEAN_SNAPSHOTS_SUCCESS, vol_name=vol_name)
log_stdout(f"Done cleaning snapshots for volume {vol_name}", vol_name=vol_name)
async def main():
sys.excepthook = excepthook
log_stdout(MSG_STARTING)
log_stdout(f"Starting Convoy backup tool!")
while True:
try:
await backup_all_volumes()
log_stdout(MSG_COMPLETED, next_iteration_in=BACKUP_INTERVAL)
log_stdout("Finished backup iteration!", next_iteration_in=BACKUP_INTERVAL)
await asyncio.sleep(BACKUP_INTERVAL)
except Exception as err:
log_stdout(MSG_ERROR, exception=str(err))
log_stdout("Error occurred during backup iteration", exception=str(err))
break

Loading…
Cancel
Save