change the whole parsing of markdown functionality

This commit is contained in:
2024-10-07 15:35:22 +02:00
parent fb6ca6c245
commit 11cc9f6d0a
21 changed files with 311 additions and 264 deletions

View File

@ -1,20 +0,0 @@
use chrono::{DateTime, Utc};
use tracing::debug;
// This filter does not have extra arguments
pub fn pretty_date(date_time: &DateTime<Utc>) -> ::askama::Result<String> {
let formatted = format!("{}", date_time.format("%e %B %Y"));
Ok(formatted)
}
// This filter does not have extra arguments
pub fn description_filter(body: &str) -> ::askama::Result<String> {
let description = body
.lines()
.filter(|line| line.starts_with("<p>"))
.take(2)
.collect::<Vec<&str>>()
.join("\n");
debug!(description);
Ok(description)
}

172
src/filters/markdown.rs Normal file
View File

@ -0,0 +1,172 @@
use std::path::Path;
use image::image_dimensions;
use indoc::formatdoc;
use pulldown_cmark::{CodeBlockKind, Event, Options, Parser, Tag, TagEnd};
use syntect::{highlighting::ThemeSet, html::highlighted_html_for_string, parsing::SyntaxSet};
use tracing::{debug, error};
use crate::picture_generator::{
picture_markup_generator::generate_picture_markup, resolutions::get_max_resolution,
};
pub const MAX_BLOG_IMAGE_RESOLUTION: (u32, u32) = (1280, 860);
enum TextKind {
Text,
Heading(Option<String>),
Code(String),
}
// pub fn parse_markdown(markdown: &str) -> ::askama::Result<String>
pub fn parse_markdown(markdown: &str) -> ::askama::Result<String> {
let mut options = Options::empty();
options.insert(Options::ENABLE_TABLES);
options.insert(Options::ENABLE_FOOTNOTES);
options.insert(Options::ENABLE_STRIKETHROUGH);
options.insert(Options::ENABLE_TASKLISTS);
options.insert(Options::ENABLE_SMART_PUNCTUATION);
options.insert(Options::ENABLE_HEADING_ATTRIBUTES);
let mut text_kind = TextKind::Text;
let syntax_set = SyntaxSet::load_defaults_newlines();
let theme_set = ThemeSet::load_defaults();
let theme = theme_set.themes.get("InspiredGitHub").unwrap();
let mut heading_ended: Option<bool> = None;
let parser = Parser::new_ext(markdown, options).map(|event| match event {
/*
Parsing images considers `alt` attribute as inner `Text` event
Therefore the `[alt]` is rendered in html as subtitle
and the `[](url "title")` `title` is rendered as `alt` attribute
*/
Event::Start(Tag::Image {
link_type: _,
dest_url,
title,
id: _,
}) => {
if !dest_url.starts_with("/") {
return Event::Html(
formatdoc!(
r#"<img
alt="{title}"
src="{dest_url}"
/>"#
)
.into(),
);
}
let dev_only_img_path =
Path::new("static/").join(dest_url.strip_prefix("/").unwrap_or(&dest_url));
let img_dimensions = image_dimensions(&dev_only_img_path).unwrap();
let (max_width, max_height) = get_max_resolution(
img_dimensions,
MAX_BLOG_IMAGE_RESOLUTION.0,
MAX_BLOG_IMAGE_RESOLUTION.1,
);
// Place image into the content with scaled reso to a boundary
let picture_markup =
generate_picture_markup(&dest_url, max_width, max_height, &title, None, true)
.unwrap_or(formatdoc!(
r#"
<img
alt="{alt}"
src="{src}"
/>"#,
alt = title,
src = dest_url,
));
Event::Html(
formatdoc!(
r#"<figure>
{picture_markup}
<figcaption>
"#,
)
.into(),
)
}
Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(lang))) => {
text_kind = TextKind::Code(lang.to_string());
Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(lang)))
}
Event::Text(text) => match &text_kind {
TextKind::Code(lang) => {
// TODO Check https://github.com/trishume/syntect/pull/535 for typescript support
let lang = if ["ts".to_string(), "typescript".to_string()].contains(lang) {
"javascript"
} else {
lang
};
let syntax_reference = syntax_set
.find_syntax_by_token(lang)
.unwrap_or(syntax_set.find_syntax_plain_text());
let highlighted =
highlighted_html_for_string(&text, &syntax_set, syntax_reference, theme)
.unwrap();
Event::Html(highlighted.into())
}
TextKind::Heading(provided_id) => {
let heading_id = provided_id.clone().unwrap_or({
text.to_lowercase()
.replace(|c: char| !c.is_alphanumeric(), "-")
});
debug!("heading_id: {}", heading_id.clone());
match heading_ended {
None => {
error!("Heading should have set state");
panic!("Heading should have set state");
}
Some(true) => Event::Html(text),
Some(false) => {
heading_ended = Some(true);
Event::Html(
formatdoc!(
r##"id="{heading_id}">
{text}"##
)
.into(),
)
}
}
}
_ => Event::Text(text),
},
Event::Start(Tag::Heading {
level,
id,
classes: _,
attrs: _,
}) => {
let id_str = id.map(|id| id.to_string());
debug!("heading_start: {:?}, level: {}", &id_str, level);
text_kind = TextKind::Heading(id_str);
heading_ended = Some(false);
Event::Html(format!("<{level} ").into())
}
Event::Start(_) => event,
Event::End(TagEnd::Image) => Event::Html("</figcaption></figure>".into()),
Event::End(TagEnd::CodeBlock) => {
text_kind = TextKind::Text;
Event::End(TagEnd::CodeBlock)
}
Event::End(TagEnd::Heading(heading_level)) => {
text_kind = TextKind::Text;
heading_ended = None;
Event::End(TagEnd::Heading(heading_level))
}
_ => event,
});
// Write to String buffer
let mut html = String::new();
pulldown_cmark::html::push_html(&mut html, parser);
// filters::safe(Html, html)
// filters::escape(Text, html)
// Ok(html)
Ok(html)
}

6
src/filters/mod.rs Normal file
View File

@ -0,0 +1,6 @@
mod markdown;
mod pretty_date;
mod truncate_md;
pub use markdown::parse_markdown;
pub use pretty_date::pretty_date;
pub use truncate_md::truncate_md;

View File

@ -0,0 +1,7 @@
use chrono::{DateTime, Utc};
// This filter does not have extra arguments
pub fn pretty_date(date_time: &DateTime<Utc>) -> ::askama::Result<String> {
let formatted = format!("{}", date_time.format("%e %B %Y"));
Ok(formatted)
}

View File

@ -0,0 +1,18 @@
// This filter does not have extra arguments
const FORBIDDEN_LINES: [&str; 5] = [" ", "#", "-", "!", "<"];
pub fn truncate_md(body: &str, rows: usize) -> ::askama::Result<String> {
let description = body
.lines()
.filter(|line| {
!FORBIDDEN_LINES
.iter()
.any(|forbidden| line.starts_with(forbidden))
&& !line.is_empty()
})
.take(rows)
.collect::<Vec<&str>>()
.join("\n");
Ok(description)
}

View File

@ -55,6 +55,5 @@ async fn main() {
// - fotos
// THINK deploy to alula? rather then katelyn? can be change whenever
//
// TODO 404 page
// TODO view page transitions
// TODO cookbook

View File

@ -29,7 +29,7 @@ pub async fn render_blog_post(
OriginalUri(original_uri): OriginalUri,
) -> Result<BlogPostTemplate, StatusCode> {
let path = format!("{}/{}.md", BLOG_POST_PATH, post_id);
let parse_post = parse_post::<BlogPostMetadata>(&path, true);
let parse_post = parse_post::<BlogPostMetadata>(&path);
let parsed = parse_post.await?;
let segment = if original_uri.to_string().starts_with("/blog") {
"blog"

View File

@ -5,6 +5,7 @@ pub mod broadcast_list;
pub mod contact;
pub mod index;
pub mod not_found;
pub mod portfolio;
pub mod post_list;
pub mod project_list;
pub mod showcase;

46
src/pages/portfolio.rs Normal file
View File

@ -0,0 +1,46 @@
use askama::Template;
use axum::http::StatusCode;
use serde::Deserialize;
use crate::{
components::site_header::HeaderProps,
filters,
post_utils::{
post_listing::get_post_list,
post_parser::{parse_post, ParseResult},
},
projects::project_model::ProjectMetadata,
};
#[derive(Deserialize, Debug)]
pub struct PortfolioPageModel {
pub title: String,
// TODO work_history
// TODO education
}
#[derive(Template)]
#[template(path = "portfolio.html")]
pub struct PortfolioTemplate {
pub title: String,
pub body: String,
pub project_list: Vec<ParseResult<ProjectMetadata>>,
pub header_props: HeaderProps,
}
pub async fn render_portfolio() -> Result<PortfolioTemplate, StatusCode> {
let portfolio = parse_post::<PortfolioPageModel>("_pages/portfolio.md").await?;
let mut project_list = get_post_list::<ProjectMetadata>("_projects").await?;
project_list.sort_by_key(|post| post.slug.to_string());
project_list.retain(|project| project.metadata.displayed);
project_list.reverse();
Ok(PortfolioTemplate {
title: "Portfolio".to_owned(),
body: portfolio.body,
header_props: HeaderProps::default(),
project_list,
})
}

View File

@ -3,6 +3,7 @@ use axum::http::StatusCode;
use crate::{
components::site_header::HeaderProps,
filters,
post_utils::{post_listing::get_post_list, post_parser::ParseResult},
projects::project_model::ProjectMetadata,
};

View File

@ -21,7 +21,7 @@ pub async fn get_post_list<'de, Metadata: DeserializeOwned>(
let file_path = file.path();
let file_path_str = file_path.to_str().unwrap();
info!(":{}", file_path_str);
let post = parse_post::<Metadata>(file_path_str, false).await?;
let post = parse_post::<Metadata>(file_path_str).await?;
posts.push(post);
}

View File

@ -1,22 +1,10 @@
use core::panic;
use std::path::Path;
use axum::http::StatusCode;
use chrono::{DateTime, Utc};
use gray_matter::{engine::YAML, Matter};
use image::image_dimensions;
use indoc::formatdoc;
use pulldown_cmark::{CodeBlockKind, Event, Options, Parser, Tag, TagEnd};
use serde::{de::DeserializeOwned, Deserialize, Deserializer};
use syntect::{highlighting::ThemeSet, html::highlighted_html_for_string, parsing::SyntaxSet};
use tokio::fs;
use tracing::{debug, error};
use crate::picture_generator::{
picture_markup_generator::generate_picture_markup, resolutions::get_max_resolution,
};
pub const MAX_BLOG_IMAGE_RESOLUTION: (u32, u32) = (1280, 860);
pub fn deserialize_date<'de, D>(deserializer: D) -> Result<DateTime<Utc>, D::Error>
where
@ -41,7 +29,6 @@ pub struct ParseResult<Metadata> {
pub async fn parse_post<'de, Metadata: DeserializeOwned>(
path: &str,
generate_images: bool,
) -> Result<ParseResult<Metadata>, StatusCode> {
let file_contents = fs::read_to_string(path)
.await
@ -56,8 +43,6 @@ pub async fn parse_post<'de, Metadata: DeserializeOwned>(
StatusCode::INTERNAL_SERVER_ERROR
})?;
let body = parse_html(&metadata.content, generate_images);
let filename = Path::new(path)
.file_stem()
.ok_or(StatusCode::INTERNAL_SERVER_ERROR)?
@ -66,173 +51,8 @@ pub async fn parse_post<'de, Metadata: DeserializeOwned>(
.to_owned();
Ok(ParseResult {
body,
body: metadata.content,
metadata: metadata.data,
slug: filename,
})
}
enum TextKind {
Text,
Heading(Option<String>),
Code(String),
}
pub fn parse_html(markdown: &str, generate_images: bool) -> String {
let mut options = Options::empty();
options.insert(Options::ENABLE_TABLES);
options.insert(Options::ENABLE_FOOTNOTES);
options.insert(Options::ENABLE_STRIKETHROUGH);
options.insert(Options::ENABLE_TASKLISTS);
options.insert(Options::ENABLE_SMART_PUNCTUATION);
options.insert(Options::ENABLE_HEADING_ATTRIBUTES);
let mut text_kind = TextKind::Text;
let syntax_set = SyntaxSet::load_defaults_newlines();
let theme_set = ThemeSet::load_defaults();
let theme = theme_set.themes.get("InspiredGitHub").unwrap();
let mut heading_ended: Option<bool> = None;
let parser = Parser::new_ext(markdown, options).map(|event| match event {
/*
Parsing images considers `alt` attribute as inner `Text` event
Therefore the `[alt]` is rendered in html as subtitle
and the `[](url "title")` `title` is rendered as `alt` attribute
*/
Event::Start(Tag::Image {
link_type,
dest_url,
title,
id,
}) => {
if !dest_url.starts_with("/") {
return Event::Html(
formatdoc!(
r#"<img
alt="{title}"
src="{dest_url}"
/>"#
)
.into(),
);
}
let dev_only_img_path =
Path::new("static/").join(dest_url.strip_prefix("/").unwrap_or(&dest_url));
let img_dimensions = image_dimensions(&dev_only_img_path).unwrap();
let (max_width, max_height) = get_max_resolution(
img_dimensions,
MAX_BLOG_IMAGE_RESOLUTION.0,
MAX_BLOG_IMAGE_RESOLUTION.1,
);
// Place image into the content with scaled reso to a boundary
let picture_markup = generate_picture_markup(
&dest_url,
max_width,
max_height,
&title,
None,
generate_images,
)
.unwrap_or(formatdoc!(
r#"
<img
alt="{alt}"
src="{src}"
/>"#,
alt = title,
src = dest_url,
));
debug!(
"Image link_type: {:?} url: {} title: {} id: {}",
link_type, dest_url, title, id
);
Event::Html(
formatdoc!(
r#"<figure>
{picture_markup}
<figcaption>
"#,
)
.into(),
)
}
Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(lang))) => {
text_kind = TextKind::Code(lang.to_string());
Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(lang)))
}
Event::Text(text) => match &text_kind {
TextKind::Code(lang) => {
// TODO Check https://github.com/trishume/syntect/pull/535 for typescript support
let lang = if ["ts".to_string(), "typescript".to_string()].contains(lang) {
"javascript"
} else {
lang
};
let syntax_reference = syntax_set
.find_syntax_by_token(lang)
.unwrap_or(syntax_set.find_syntax_plain_text());
let highlighted =
highlighted_html_for_string(&text, &syntax_set, syntax_reference, theme)
.unwrap();
Event::Html(highlighted.into())
}
TextKind::Heading(provided_id) => {
let heading_id = provided_id.clone().unwrap_or({
text.to_lowercase()
.replace(|c: char| !c.is_alphanumeric(), "-")
});
debug!("heading_id: {}", heading_id.clone());
match heading_ended {
None => {
error!("Heading should have set state");
panic!("Heading should have set state");
}
Some(true) => Event::Html(text),
Some(false) => {
heading_ended = Some(true);
Event::Html(
formatdoc!(
r##"id="{heading_id}">
{text}"##
)
.into(),
)
}
}
}
_ => Event::Text(text),
},
Event::Start(Tag::Heading {
level,
id,
classes: _,
attrs: _,
}) => {
let id_str = id.map(|id| id.to_string());
debug!("heading_start: {:?}, level: {}", &id_str, level);
text_kind = TextKind::Heading(id_str);
heading_ended = Some(false);
Event::Html(format!("<{level} ").into())
}
Event::Start(_) => event,
Event::End(TagEnd::Image) => Event::Html("</figcaption></figure>".into()),
Event::End(TagEnd::CodeBlock) => {
text_kind = TextKind::Text;
Event::End(TagEnd::CodeBlock)
}
Event::End(TagEnd::Heading(heading_level)) => {
text_kind = TextKind::Text;
heading_ended = None;
Event::End(TagEnd::Heading(heading_level))
}
_ => event,
});
// Write to String buffer
let mut html = String::new();
pulldown_cmark::html::push_html(&mut html, parser);
html
}

View File

@ -4,7 +4,8 @@ use crate::{
admin::render_admin, blog_post_list::render_blog_post_list,
blog_post_page::render_blog_post, broadcast_list::render_broadcast_post_list,
contact::render_contact, index::render_index, not_found::render_not_found,
project_list::render_projects_list, showcase::egg_fetcher::render_egg_fetcher,
portfolio::render_portfolio, project_list::render_projects_list,
showcase::egg_fetcher::render_egg_fetcher,
},
};
use axum::{extract::MatchedPath, http::Request, routing::get, Router};
@ -23,6 +24,7 @@ pub fn get_router() -> Router {
.route("/contact", get(render_contact))
.route("/showcase", get(render_projects_list))
.route("/showcase/:project_slug", get(render_egg_fetcher))
.route("/portfolio", get(render_portfolio))
.route("/admin", get(render_admin))
.route("/feed.xml", get(render_rss_feed))
.layer(