diff options
author | zeldakatze <coffee@zeldakatze.de> | 2025-10-08 10:23:53 +0200 |
---|---|---|
committer | zeldakatze <coffee@zeldakatze.de> | 2025-10-08 10:23:53 +0200 |
commit | efbe8e78a32e14e791f764ae80397c5d5b13f3b9 (patch) | |
tree | d345547ac0fdad306b9c1fd9af194afad54fbb40 | |
parent | d0c9031ef9cbe6ea8e92a5f8d2114fad524864f4 (diff) | |
download | website-efbe8e78a32e14e791f764ae80397c5d5b13f3b9.tar.gz website-efbe8e78a32e14e791f764ae80397c5d5b13f3b9.zip |
-rw-r--r-- | in/index.html | 1 | ||||
-rw-r--r-- | in/smallunix.html | 2 | ||||
-rw-r--r-- | in/tinkering.html | 3 | ||||
-rwxr-xr-x | qdsg.sh | 51 | ||||
-rw-r--r-- | robots.txt | 67 |
5 files changed, 120 insertions, 4 deletions
diff --git a/in/index.html b/in/index.html index cc8de7a..c3ea0b5 100644 --- a/in/index.html +++ b/in/index.html @@ -1,3 +1,4 @@ +TITLE: Index <h2>Über mich</h2> <p> Hallo, ich bin Maite (sie/ihr). Ich bastel gerne mit Technik rum, mache diff --git a/in/smallunix.html b/in/smallunix.html index 5f9167a..475226b 100644 --- a/in/smallunix.html +++ b/in/smallunix.html @@ -43,7 +43,7 @@ haben.</p> <h2>Quellcode</h2> <p>Der Quellcode kann auf meiner cgit-Instanz -(<a href='https://cgit.zeldakatze.de/smallUnix/'>Hier</a>) gefunden werden. +(<a href="https://cgit.zeldakatze.de/smallUnix/">Hier</a>) gefunden werden. Zum Kompilieren gibt es ein eigenes Toolchain, das zunächst kompiliert werden muss. Anschließend kann das System recht leicht mit ./build.sh kompiliert werden </p> diff --git a/in/tinkering.html b/in/tinkering.html index 567e514..5bc38bb 100644 --- a/in/tinkering.html +++ b/in/tinkering.html @@ -1,8 +1,9 @@ +TITLE: Basteleien Ich habe derzeit einige Projekte an denen ich Arbeite, einige davon sind hier aufgezählt. -<a href='smallunix.html'><h2>smallUnix</h2></a> +<a href="smallunix.html"><h2>smallUnix</h2></a> Ein sich Unix-Ähnlich anfühlendes System, das nicht-kontinuierliche Adressräume verwendet. @@ -1,14 +1,61 @@ #!/bin/bash +run_ffmpeg_if_not_exist() { + local vid_input = $1 + local out_basename = $2 + local vid_codec = $3 + + ffmpeg -i "$vid_input" -c:v "$vid_codec" +} + +# generates multiple resolution for the given video files. +# $1: file name +# @return +encode_video() { + # name the input + local vid_input = $1 + local vid + + +} + +# ensure that the necessary directories exist mkdir -p out/ +mkdir -p out/video/mp4/ + +# copy robots.txt +echo "Copying robots.txt" +cp robots.txt out/ +# process every html site for inFile in in/*.html; do [ -e "$inFile" ] || continue echo "Processing file $inFile" filename=$(basename "$inFile") outFile="out/$filename" - cat header.html > "$outFile" - cat "$inFile" >> "$outFile" + + # get the title of the file if it is noted in the file + site_title="" + firstline=$(cat $inFile | head -n1) + [[ $firstline == TITLE:* ]] && { + site_title="$(echo $firstline | sed 's/TITLE: //g') - " + } + + # assemble the page + cat header.html | sed "s/%SITE_TITLE%/$site_title/g" > "$outFile" + if [[ $firstline == TITLE:* ]]; then + cat "$inFile" | tail -n +2 >> "$outFile" + else + echo -en "\t" && echo "$filename does not seem to have a title!" + cat "$inFile" >> "$outFile" + fi + cat footer.html >> "$outFile" done + +for inFile in videos/*; do + sleep 0 +done + + diff --git a/robots.txt b/robots.txt new file mode 100644 index 0000000..eab12f6 --- /dev/null +++ b/robots.txt @@ -0,0 +1,67 @@ +# Block all known AI crawlers and assistants +# from using content for training AI models. +# Source: https://robotstxt.com/ai +User-Agent: GPTBot +User-Agent: ClaudeBot +User-Agent: Claude-User +User-Agent: Claude-SearchBot +User-Agent: CCBot +User-Agent: Google-Extended +User-Agent: Applebot-Extended +User-Agent: Facebookbot +User-Agent: Meta-ExternalAgent +User-Agent: Meta-ExternalFetcher +User-Agent: diffbot +User-Agent: PerplexityBot +User-Agent: Perplexity‑User +User-Agent: Omgili +User-Agent: Omgilibot +User-Agent: webzio-extended +User-Agent: ImagesiftBot +User-Agent: Bytespider +User-Agent: TikTokSpider +User-Agent: Amazonbot +User-Agent: Youbot +User-Agent: SemrushBot-OCOB +User-Agent: Petalbot +User-Agent: VelenPublicWebCrawler +User-Agent: TurnitinBot +User-Agent: Timpibot +User-Agent: OAI-SearchBot +User-Agent: ICC-Crawler +User-Agent: AI2Bot +User-Agent: AI2Bot-Dolma +User-Agent: DataForSeoBot +User-Agent: AwarioBot +User-Agent: AwarioSmartBot +User-Agent: AwarioRssBot +User-Agent: Google-CloudVertexBot +User-Agent: PanguBot +User-Agent: Kangaroo Bot +User-Agent: Sentibot +User-Agent: img2dataset +User-Agent: Meltwater +User-Agent: Seekr +User-Agent: peer39_crawler +User-Agent: cohere-ai +User-Agent: cohere-training-data-crawler +User-Agent: DuckAssistBot +User-Agent: Scrapy +User-Agent: Cotoyogi +User-Agent: aiHitBot +User-Agent: Factset_spyderbot +User-Agent: FirecrawlAgent + +Disallow: / +DisallowAITraining: / + +# Block any non-specified AI crawlers (e.g., new +# or unknown bots) from using content for training +# AI models, while allowing the website to be +# indexed and accessed by bots. These directives +# are still experimental and may not be supported +# by all AI crawlers. +User-Agent: * +DisallowAITraining: / +Content-Usage: ai=n +Allow: / |