Compare commits

...

8 Commits

Author SHA1 Message Date
a8d567a384
add changed data to output for command line processing
Some checks failed
continuous-integration/drone/push Build is failing
2021-01-05 23:26:33 -08:00
df1837e577
move diagnostic output off stdout 2021-01-05 23:26:13 -08:00
201a79e58f
handle lack of etag 2021-01-05 23:25:39 -08:00
2e52dc0bac
move to scratch dockerfile (saves 100M) 2021-01-05 23:25:11 -08:00
f6ef3fe87a
add README 2021-01-05 23:24:23 -08:00
0af67168a9
removing the conditional - just forcing --pull/--no-cache all the time 2021-01-05 15:16:34 -08:00
76d0853864
buildx not necessary (see details below)
Without buildx you need to seriously watch your cache. Ultimately
the solution here still as a hole, because the FROM image(s) in
the Dockerfile will end up cached in whatever architecture
goes last. We are relying on the build server not caching things,
but be wary when doing local multi-architecture builds
2021-01-05 15:15:15 -08:00
062ead81a8
add dockerignore 2021-01-05 15:11:49 -08:00
5 changed files with 132 additions and 20 deletions

4
.dockerignore Normal file
View File

@ -0,0 +1,4 @@
.git
include/
lib/
*.zip

View File

@ -1,9 +1,76 @@
FROM python:3.8.7-alpine3.12
FROM python:3.9-slim-buster AS builder
# staticx has two issues:
# 1. It does not seem to play well with alpine (at least for Python+pie).
# In that configuration, it seems to think it's a glibc executable
# 2. It does not play well with PIE executables, see
# https://github.com/JonathonReinhart/staticx/issues/71
WORKDIR /app
ENTRYPOINT ["etags.py"]
RUN true \
&& apt-get update \
&& apt-get install --no-install-recommends -y \
build-essential=12.6 \
patchelf=0.9* \
&& pip3 install pyinstaller==4.1 \
scons==4.0.1 \
patchelf-wrapper==1.2.0 \
staticx==0.12.0 \
&& rm -rf /var/lib/apt/lists/*
COPY requirements.txt /app/
COPY etags.py /app/
ARG PYINSTALLER_TAG=v4.1
RUN pip3 install -r requirements.txt && rm /app/requirements.txt
# HACK to get around https://github.com/JonathonReinhart/staticx/issues/71
RUN true \
&& apt-get update \
&& apt-get install --no-install-recommends -y \
git=1:2.20* \
zlib1g-dev=1:1.2.11* \
&& git clone --depth 1 --single-branch --branch ${PYINSTALLER_TAG} \
https://github.com/pyinstaller/pyinstaller.git /tmp/pyinstaller \
&& cd /tmp/pyinstaller/bootloader \
&& CC="gcc -no-pie" python ./waf configure --no-lsb all \
&& cp -R /tmp/pyinstaller/PyInstaller/bootloader/* \
/usr/local/lib/python*/site-packages/PyInstaller/bootloader/ \
&& rm -rf /var/lib/apt/lists/*
# # ENTRYPOINT ["etags.py"]
#
COPY requirements.txt /src/
COPY etags.py /src/
WORKDIR /src
RUN true \
&& pip3 install -r requirements.txt \
&& pyinstaller -F etags.py \
&& staticx \
--strip \
--no-compress \
-l /lib/x86_64-linux-gnu/libgcc_s.so.1 \
dist/etags dist/app \
&& chmod 755 dist/app
FROM scratch
# Allow ssl comms
COPY --from=builder /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/
# So we can set the user
COPY --from=builder /etc/passwd /etc/passwd
COPY --from=builder /etc/group /etc/group
# This should need no privileges
USER nobody:nogroup
# Environment variables that should be set
ENV AWS_DEFAULT_REGION=us-west-2
ENV AWS_ACCESS_KEY_ID=AKIAEXAMPLE
ENV AWS_SECRET_ACCESS_KEY=dummy
# Set if you're not talking to real DDB
# ENV DDB_ENDPOINT
ENV ETAGS_TABLE=etags
# Setting this variable to nothing will turn off bus notification
ENV ETAGS_BUS_NAME=
ENTRYPOINT ["/app"]
COPY --from=builder /src/dist/app /app

View File

@ -110,22 +110,15 @@ $(CONTAINER_DOTFILES):
@export DOCKER_CLI_EXPERIMENTAL=enabled && \
if $(DKR) --version | grep -q podman; then \
[ "$(HOSTARCH)" != "$(ARCH)" ] && \
echo "Podman build on different arch not tested: probably broken" && \
echo "Podman build on different arch is likely broken" && \
echo "See: https://github.com/containers/buildah/issues/1590"; \
$(DKR) build --platform $(OS)/$(ARCH) -t $(REGISTRY)/$(BIN):$(TAG) \
-f .dockerfile-$(BIN)-$(OS)_$(ARCH) .; \
else \
if [ "$(HOSTARCH)" = "$(ARCH)" ]; then \
$(DKR) build -t $(REGISTRY)/$(BIN):$(TAG) \
-f .dockerfile-$(BIN)-$(OS)_$(ARCH) .; \
else \
echo "See https://medium.com/@artur.klauser/building-multi-architecture-docker-images-with-buildx-27d80f7e2408"; \
echo "for host qemu setup if the buildx command fails"; \
$(DKR) buildx build --load \
-t $(REGISTRY)/$(BIN):$(TAG) -f .dockerfile-$(BIN)-$(OS)_$(ARCH) \
--platform $(OS)/$(ARCH) \
. ; \
fi; \
$(DKR) build --pull --no-cache \
-t $(REGISTRY)/$(BIN):$(TAG) -f .dockerfile-$(BIN)-$(OS)_$(ARCH) \
--platform $(OS)/$(ARCH) \
. ; \
fi
@$(DKR) images -q $(REGISTRY)/$(BIN):$(TAG) > $@
@echo

43
README.md Normal file
View File

@ -0,0 +1,43 @@
# etags
Checks urls for etags and reports back any changed urls. Requires [DynamoDb](https://aws.amazon.com/dynamodb/),
or something like it. For a lightweight, self-hosted version, you can try
[DynamoDb Bolt](https://github.com/elerch/ddbbolt).
Usage: `etags.py <url> ...`
## Environment variables
* ETAGS_BUS_NAME: If set, this will notify on an [EventBridge bus](https://aws.amazon.com/eventbridge/)
* ETAGS_TABLE: Table name for DynamoDB
* DDB_ENDPOINT: By default, the application will use DynamoDb's standard endpoint
Set this variable if using a non-standard endpoint or DynamoDb Bolt
This uses boto3, so all [AWS Environment Variables](https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-envvars.html)
will control authentication. If using DynamoDb Bolt, the following must be set,
but can be dummy values:
* AWS_DEFAULT_REGION
* AWS_ACCESS_KEY_ID
* AWS_SECRET_ACCESS_KEY
# Running as Docker
Latest version can be found at [https://r.lerch.org/repo/etags/tags/](https://r.lerch.org/repo/etags/tags/).
Versions are tagged with the short hash of the git commit, and are
built as a multi-architecture image based on a scratch image.
You can run the docker image with a command like:
```sh
docker run \
--rm \
--tmpfs /tmp \
--name=ddbbolt \
-e AWS_DEFAULT_REGION=us-west-2 \
-e AWS_ACCESS_KEY_ID=AKIAEXAMPLE \
-e AWS_SECRET_ACCESS_KEY=dummy \
-e DDB_ENDPOINT=set_if_applicable \
-e ETAGS_TABLE=etags \
r.lerch.org/etags:0af6716
```

View File

@ -114,6 +114,10 @@ def lambda_handler(event, context):
for future in as_completed(rs):
try:
result = future.result()
if 'etag' not in result.headers:
printerr('WARNING: Will not process, no etag found for %s' %
rsdict[future]['url'])
break
current_etag = result.headers['etag']
prior_etag = None
if rsdict[future]['url'] in existing_etags:
@ -129,12 +133,13 @@ def lambda_handler(event, context):
traceback.print_exc()
if len(changed) > 0:
print('changes detected')
printerr('changes detected')
process_changes(changed)
return {
'statusCode': 200,
'body': json.dumps(event)
'body': json.dumps(event),
'changed': changed,
}