136 lines
5.1 KiB
Markdown
136 lines
5.1 KiB
Markdown
|
|
# Static site generator in 90 lines of Python code
|
|||
|
|
|
|||
|
|
Created: Apr 16, 2025
|
|||
|
|
|
|||
|
|
[На русском](/ru/blog/2025/01-sitegen)
|
|||
|
|
|
|||
|
|
A long time ago, I made a small business card website. It consisted of three
|
|||
|
|
simple HTML pages, one CSS file (which I generated from SCSS), several fonts
|
|||
|
|
and images. That was more than enough to get a link to my website featured
|
|||
|
|
in a resume or social media profile.
|
|||
|
|
|
|||
|
|
|
|||
|
|

|
|||
|
|
|
|||
|
|
I recently decided to continue working <a href="https://github.com/blankhex/bhlib" target="_blank">on my pet-project</a>
|
|||
|
|
and would like to publish all sorts of notes and articles on this topic on my
|
|||
|
|
website. I didn't want to manually mess with HTML files, so I decided to look
|
|||
|
|
for an alternative in the form of some kind of static website generator.
|
|||
|
|
Ideally, I would like it to be:
|
|||
|
|
|
|||
|
|
- Small and simple
|
|||
|
|
- Able to work with Markdown
|
|||
|
|
- Able syntax-highlight blocks of code
|
|||
|
|
|
|||
|
|
Unfortunately, I couldn't find any suitable solutions for myself, so I decided
|
|||
|
|
to build my own using Python, <a href="https://mistune.lepture.com/en/latest/" target="_blank">mistune</a>
|
|||
|
|
Markdown parser, <a href="https://jinja.palletsprojects.com/en/stable/" target="_blank">Jinja2</a>
|
|||
|
|
template engine, and <a href="https://pygments.org" target="_blank">Pygments</a>.
|
|||
|
|
The whole generation process boils down to the following:
|
|||
|
|
|
|||
|
|
1. For every file in the input directory check whether it is Markdown
|
|||
|
|
- If yes - convert it to HTML (with highlighting) and write to output directory
|
|||
|
|
- If no - copy as is to output directory
|
|||
|
|
2. Compress content of the output directory
|
|||
|
|
|
|||
|
|
This generation process has a rather major drawback - due to the fact that
|
|||
|
|
there is no post-processing of HTML, any links to other Markdown pages must
|
|||
|
|
end with a `.html` extension[^1].
|
|||
|
|
|
|||
|
|
[^1]: This can be mitigated by special web-server configuration, that replaces
|
|||
|
|
`.md` extension with `.html` or by omitting `.md` extension entirely and
|
|||
|
|
using something like `try_files $uri $uri.html`
|
|||
|
|
|
|||
|
|
Here is the code:
|
|||
|
|
|
|||
|
|
```python
|
|||
|
|
import re, jinja2, mistune, shutil, os, pathlib, tarfile
|
|||
|
|
from pygments.lexers import get_lexer_by_name
|
|||
|
|
from pygments.formatters import HtmlFormatter
|
|||
|
|
from pygments import highlight
|
|||
|
|
|
|||
|
|
|
|||
|
|
class PygmentsHTMLRenderer(mistune.HTMLRenderer):
|
|||
|
|
def block_code(self, code: str, info = None):
|
|||
|
|
if not info:
|
|||
|
|
return '\n<pre><code>%s</code></pre>\n' % mistune.escape(code)
|
|||
|
|
lexer = get_lexer_by_name(info, stripall=True)
|
|||
|
|
formatter = HtmlFormatter(lineseparator='<br>')
|
|||
|
|
return highlight(code, lexer, formatter)
|
|||
|
|
|
|||
|
|
|
|||
|
|
def convert_markdown(page: str):
|
|||
|
|
plugins = ['footnotes', 'table', 'strikethrough', 'url']
|
|||
|
|
renderer = PygmentsHTMLRenderer(escape=False)
|
|||
|
|
return mistune.create_markdown(plugins=plugins, renderer=renderer)(page)
|
|||
|
|
|
|||
|
|
|
|||
|
|
def extract_title(page: str):
|
|||
|
|
matches = re.match('<h1>(.*?)</h1>', page)
|
|||
|
|
if matches:
|
|||
|
|
return matches.group(1)
|
|||
|
|
return 'BlankHex'
|
|||
|
|
|
|||
|
|
|
|||
|
|
def handle_file(path: str, input_dir: str, output_dir: str, template_name: str):
|
|||
|
|
# Calculate input and output paths
|
|||
|
|
relpath = os.path.relpath(path, input_dir)
|
|||
|
|
input_path = path
|
|||
|
|
output_path = os.path.join(output_dir, relpath)
|
|||
|
|
if input_path.endswith('.md'):
|
|||
|
|
output_path = output_path.replace('.md', '.html')
|
|||
|
|
|
|||
|
|
# Don't convert if output path exists
|
|||
|
|
if os.path.exists(output_path):
|
|||
|
|
return
|
|||
|
|
|
|||
|
|
# Run conversion
|
|||
|
|
pathlib.Path(os.path.dirname(output_path)).mkdir(parents=True, exist_ok=True)
|
|||
|
|
if input_path.endswith('.md'):
|
|||
|
|
# Read Markdown document
|
|||
|
|
with open(input_path, 'r') as handle:
|
|||
|
|
markdown_page = handle.read()
|
|||
|
|
|
|||
|
|
# Get Pygments styles for light and dark themes
|
|||
|
|
light_style = HtmlFormatter(style='default').get_style_defs()
|
|||
|
|
dark_style = HtmlFormatter(style='monokai').get_style_defs()
|
|||
|
|
|
|||
|
|
# Convert Markdown document to HTML document
|
|||
|
|
html_page = convert_markdown(markdown_page)
|
|||
|
|
html_header = extract_title(html_page)
|
|||
|
|
environment = jinja2.Environment(loader=jinja2.FileSystemLoader('template/'))
|
|||
|
|
template = environment.get_template(template_name)
|
|||
|
|
output_page = template.render(title=html_header,
|
|||
|
|
body=html_page,
|
|||
|
|
light_style=light_style,
|
|||
|
|
dark_style=dark_style)
|
|||
|
|
|
|||
|
|
# Write HTML document
|
|||
|
|
with open(output_path, 'w') as handle:
|
|||
|
|
handle.write(output_page)
|
|||
|
|
else:
|
|||
|
|
# Copy file as is
|
|||
|
|
shutil.copy(path, output_path)
|
|||
|
|
|
|||
|
|
|
|||
|
|
def convert_dir(input_dir: str, output_dir: str, template_name: str):
|
|||
|
|
# Convert or copy every file from the input directory to the output directory
|
|||
|
|
for subdir, dirs, files in os.walk(input_dir):
|
|||
|
|
for file in files:
|
|||
|
|
handle_file(os.path.join(subdir, file), input_dir, output_dir, template_name)
|
|||
|
|
|
|||
|
|
|
|||
|
|
# Remove output from previous run
|
|||
|
|
if os.path.isdir('public'):
|
|||
|
|
shutil.rmtree('public')
|
|||
|
|
if os.path.isfile('public.tgz'):
|
|||
|
|
os.remove('public.tgz')
|
|||
|
|
|
|||
|
|
# Run conversion
|
|||
|
|
convert_dir('content', 'public', 'template.html')
|
|||
|
|
with tarfile.open('public.tgz', 'w:gz') as tar:
|
|||
|
|
for file in os.listdir('public'):
|
|||
|
|
tar.add(os.path.join('public', file), file)
|
|||
|
|
```
|
|||
|
|
|