ys1r/
markdown.rs

1/// Extracts the language label from a Markdown-style code block.
2///
3/// This function looks for the first occurrence of a triple backtick ("```"),
4/// then captures everything from immediately after it up to the next newline.
5/// That captured substring is returned as the language label (e.g., "rust", "python").
6///
7/// # Arguments
8/// * `text` - A string slice that may contain a Markdown code block.
9///
10/// # Returns
11/// * `Some(String)` containing the language label if a code block is found
12///   and a newline follows the opening backticks.
13/// * `None` if no opening backticks or newline is found.
14///
15/// # Example
16/// ```
17/// let input = "Here is code:\n```rust\nfn main() {}\n```";
18/// let lang = ys1r::markdown::extract_lang_label(input);
19/// assert_eq!(lang, Some("rust".to_string()));
20/// ```
21pub fn extract_lang_label(text: &str) -> Option<String> {
22    let start = text.find("```")?;
23    let end = text[start + 3..].find("\n")?;
24    Some(text[start + 3..start + 3 + end].to_string())
25}
26
27/// Extracts the first fenced Markdown code block from the input string.
28///
29/// This function looks for a triple-backtick (` ``` `) fenced code block and
30/// returns the contents inside it. If a language identifier is present
31/// (e.g. ```rust), it is skipped automatically.
32///
33/// # Arguments
34///
35/// * `input` - The input string that may contain a fenced code block.
36///
37/// # Returns
38///
39/// * `Some(String)` containing the inner contents of the first code block
40/// * `None` if no fenced code block is found
41pub fn extract_code_block(input: &str) -> Option<String> {
42    let start = input.find("```")?;
43    let after_start = &input[start + 3..];
44
45    // Skip language identifier if present by advancing to the first newline
46    let content_start = after_start.find('\n').map_or(0, |pos| pos + 1);
47    let rest = &after_start[content_start..];
48
49    let end = rest.find("```")?;
50    Some(rest[..end].to_string())
51}
52
53/// Returns the contents of the first fenced Markdown code block if present,
54/// otherwise returns the original input unchanged.
55///
56/// This is a convenience wrapper around [`extract_code_block`] that guarantees
57/// a `String` result, making it useful when a fallback value is required.
58///
59/// # Arguments
60///
61/// * `input` - The input string that may contain a fenced code block.
62///
63/// # Returns
64///
65/// * The extracted code block contents if found
66/// * Otherwise, the original input converted to a `String`
67pub fn extract_code_block_or_original(input: &str) -> String {
68    extract_code_block(input).unwrap_or_else(|| input.to_string())
69}
70
71#[cfg(test)]
72mod tests {
73    use super::*;
74
75    #[test]
76    fn extracts_simple_code_block() {
77        let input = "```\nhello\n```";
78        let result = extract_code_block(input);
79        assert_eq!(result, Some("hello\n".to_string()));
80    }
81
82    #[test]
83    fn extracts_code_block_with_language() {
84        let input = "```rust\nlet x = 42;\n```";
85        let result = extract_code_block(input);
86        assert_eq!(result, Some("let x = 42;\n".to_string()));
87    }
88
89    #[test]
90    fn extracts_first_code_block_only() {
91        let input = "```\nfirst\n```\n```\nsecond\n```";
92        let result = extract_code_block(input);
93        assert_eq!(result, Some("first\n".to_string()));
94    }
95
96    #[test]
97    fn returns_none_when_no_code_block() {
98        let input = "no code block here";
99        let result = extract_code_block(input);
100        assert_eq!(result, None);
101    }
102
103    #[test]
104    fn returns_original_when_no_code_block() {
105        let input = "no code block here";
106        let result = extract_code_block_or_original(input);
107        assert_eq!(result, input.to_string());
108    }
109
110    #[test]
111    fn returns_extracted_code_when_present() {
112        let input = "text before\n```\ncode\n```\ntext after";
113        let result = extract_code_block_or_original(input);
114        assert_eq!(result, "code\n".to_string());
115    }
116
117    #[test]
118    fn handles_unclosed_code_block() {
119        let input = "```\nunclosed";
120        let result = extract_code_block(input);
121        assert_eq!(result, None);
122    }
123
124    #[test]
125    fn extracts_rust_label() {
126        let input = "```rust\nfn main() {}\n```";
127        assert_eq!(extract_lang_label(input), Some("rust".to_string()));
128    }
129
130    #[test]
131    fn extracts_python_label() {
132        let input = "Some text\n```python\nprint('hi')\n```";
133        assert_eq!(extract_lang_label(input), Some("python".to_string()));
134    }
135
136    #[test]
137    fn returns_empty_string_if_no_label() {
138        let input = "```\ncode block\n```";
139        assert_eq!(extract_lang_label(input), Some("".to_string()));
140    }
141
142    #[test]
143    fn returns_none_if_no_backticks() {
144        let input = "no code block here";
145        assert_eq!(extract_lang_label(input), None);
146    }
147
148    #[test]
149    fn returns_none_if_no_newline_after_backticks() {
150        let input = "```rust fn main() {}```";
151        assert_eq!(extract_lang_label(input), None);
152    }
153
154    #[test]
155    fn only_extracts_first_block() {
156        let input = "```rust\nfn main() {}\n```\n```python\nprint()\n```";
157        assert_eq!(extract_lang_label(input), Some("rust".to_string()));
158    }
159}