Note: This site is currently "Under construction". I'm migrating to a new version of my site building software. Lots of things are in a state of disrepair as a result (for example, footnote links aren't working). It's all part of the process of building in public. Most things should still be readable though.

Neopolitan nom Parsing Playground Scratchpad

This is a holding spot for code while I get it working.

This version does basic text splitting on multi line breaks but does it by looking at things before they get parsed by the many1 parsers

The ones up top are the later ones.

Code

#![allow(unused_imports)]
#![allow(unused_variables)]
#![allow(dead_code)]
use nom::branch::alt;
use nom::bytes::complete::is_not;
use nom::bytes::complete::tag;
use nom::bytes::complete::tag_no_case;
use nom::bytes::complete::take_until;
use nom::character::complete::line_ending;
use nom::character::complete::multispace0;
use nom::character::complete::newline;
use nom::character::complete::not_line_ending;
use nom::character::complete::space0;
use nom::combinator::eof;
use nom::combinator::not;
use nom::combinator::opt;
use nom::combinator::rest;
use nom::multi::many0;
use nom::multi::many1;
use nom::multi::many_till;
use nom::sequence::delimited;
use nom::sequence::preceded;
use nom::sequence::terminated;
use nom::sequence::tuple;
use nom::IResult;
use nom::Parser;

fn main() {
    println!("Run tests instead of running main");
}

fn attributes(source: &str) -> IResult<&str, Option<Vec<Attribute>>> {
    let (source, attributes) = opt(many1(preceded(alt((tag("--"), tag("|"))), attr_id)))(source)?;
    Ok((source, attributes))
}

fn attr_id(source: &str) -> IResult<&str, Attribute> {
    let (source, _) = space0(source)?;
    let (source, attr) = preceded(tag("id: "), is_not("|>\n"))(source)?;
    Ok((
        source,
        Attribute::Id {
            value: attr.to_string(),
        },
    ))
}

fn aside(source: &str) -> IResult<&str, Section> {
    let (source, _) = tag_no_case("-- aside")(source)?;
    let (source, _) = space0(source)?;
    let (source, _) = line_ending(source)?;
    let (source, attributes) = attributes(source)?;
    let (source, content) = paragraphs(source)?;
    Ok((
        source,
        Section::Aside {
            attributes,
            content,
        },
    ))
}

fn list(source: &str) -> IResult<&str, Section> {
    let (source, _) = tag_no_case("-- list")(source)?;
    let (source, _) = space0(source)?;
    let (source, _) = line_ending(source)?;
    let (source, attributes) = attributes(source)?;
    let preface = None;
    let (source, items) = opt(many1(preceded(multispace0, list_item)))(source)?;
    Ok((
        source,
        Section::List {
            attributes,
            items,
            preface,
        },
    ))
}

fn list_item(source: &str) -> IResult<&str, Container> {
    let (source, content) = many1(preceded(tag("- "), paragraph))(source)?;
    Ok((source, Container::ListItem { content }))
}

fn sections(source: &str) -> IResult<&str, Vec<Section>> {
    let (source, sections) = many1(preceded(multispace0, alt((aside, list, p, title))))(source)?;
    Ok((source, sections))
}

fn snippet_strong(source: &str) -> IResult<&str, Snippet> {
    let (source, _) = tag_no_case("<<strong|")(source)?;
    let (source, content) = is_not("|>")(source)?;
    dbg!(&source);
    let (source, attributes) = attributes(source)?;
    dbg!(&source);
    let (source, _) = tag(">>")(source)?;
    Ok((
        source,
        Snippet::Strong {
            text: content.to_string(),
            attributes,
        },
    ))
}

fn snippet_text(source: &str) -> IResult<&str, Snippet> {
    let (source, content) = is_not("\n<")(source)?;
    Ok((
        source,
        Snippet::Text {
            text: content.to_string(),
        },
    ))
}

fn p(source: &str) -> IResult<&str, Section> {
    let (source, _) = tag_no_case("-- p")(source)?;
    let (source, _) = space0(source)?;
    let (source, _) = line_ending(source)?;
    let (source, attributes) = attributes(source)?;
    let (source, content) = paragraphs(source)?;
    Ok((
        source,
        Section::P {
            attributes,
            content,
        },
    ))
}

fn paragraphs(source: &str) -> IResult<&str, Option<Vec<Block>>> {
    let (source, paragraphs) = opt(many1(preceded(multispace0, paragraph)))(source)?;
    Ok((source, paragraphs))
}

fn paragraph(source: &str) -> IResult<&str, Block> {
    let (source, _) = not(tag("--"))(source)?;
    let (source, snippets) = many1(preceded(
        opt(line_ending),
        alt((snippet_text, snippet_strong)),
    ))(source)?;
    Ok((source, Block::Paragraph { snippets }))
}

fn title(source: &str) -> IResult<&str, Section> {
    let (source, _) = tag_no_case("-- title")(source)?;
    let (source, _) = space0(source)?;
    let (source, _) = line_ending(source)?;
    let (source, attributes) = attributes(source)?;
    let (source, headline) = opt(paragraph)(source)?;
    let (source, content) = paragraphs(source)?;
    Ok((
        source,
        Section::Title {
            attributes,
            headline,
            content,
        },
    ))
}

#[derive(Debug, PartialEq)]
enum Attribute {
    Id { value: String },
    None,
}

#[derive(Debug, PartialEq)]
enum Section {
    Aside {
        attributes: Option<Vec<Attribute>>,
        content: Option<Vec<Block>>,
    },
    List {
        attributes: Option<Vec<Attribute>>,
        preface: Option<Vec<Block>>,
        items: Option<Vec<Container>>,
    },
    P {
        attributes: Option<Vec<Attribute>>,
        content: Option<Vec<Block>>,
    },
    Title {
        attributes: Option<Vec<Attribute>>,
        headline: Option<Block>,
        content: Option<Vec<Block>>,
    },
}

#[derive(Debug, PartialEq)]
enum Block {
    Paragraph { snippets: Vec<Snippet> },
    None,
}

#[derive(Debug, PartialEq)]
enum Container {
    ListItem { content: Vec<Block> },
    None,
}

#[derive(Debug, PartialEq)]
enum Snippet {
    Text {
        text: String,
    },
    Strong {
        text: String,
        attributes: Option<Vec<Attribute>>,
    },
    None,
}

#[cfg(test)]

mod test {

    use super::*;

    #[test]
    fn integration_alfa() {
        let source = r#"

-- title

alfa bravo

charlie <<strong|delta>>

echo foxtrot
"#;
        let expected = vec![Section::Title {
            attributes: None,
            headline: Some(Block::Paragraph {
                snippets: vec![Snippet::Text {
                    text: "alfa bravo".to_string(),
                }],
            }),
            content: Some(vec![
                Block::Paragraph {
                    snippets: vec![
                        Snippet::Text {
                            text: "charlie ".to_string(),
                        },
                        Snippet::Strong {
                            attributes: None,
                            text: "delta".to_string(),
                        },
                    ],
                },
                Block::Paragraph {
                    snippets: vec![Snippet::Text {
                        text: "echo foxtrot".to_string(),
                    }],
                },
            ]),
        }];
        let results = sections(source);
        assert_eq!(expected, results.unwrap().1);
    }

    #[test]
    fn aside_basic() {
        let source = "-- aside\n\nalfa bravo";
        let expected = vec![Section::Aside {
            attributes: None,
            content: Some(vec![Block::Paragraph {
                snippets: vec![Snippet::Text {
                    text: "alfa bravo".to_string(),
                }],
            }]),
        }];
        let results = sections(source);
        assert_eq!(expected, results.unwrap().1);
    }

    #[test]
    fn aside_with_attributes() {
        let source = "-- aside\n-- id: charlie\n\nalfa bravo";
        let expected = vec![Section::Aside {
            attributes: Some(vec![Attribute::Id {
                value: "charlie".to_string(),
            }]),
            content: Some(vec![Block::Paragraph {
                snippets: vec![Snippet::Text {
                    text: "alfa bravo".to_string(),
                }],
            }]),
        }];
        let results = sections(source);
        assert_eq!(expected, results.unwrap().1);
    }

    #[test]
    fn aside_with_strong() {
        let source = "-- aside\n\nalfa <<strong|bravo>> charlie";
        let expected = vec![Section::Aside {
            attributes: None,
            content: Some(vec![Block::Paragraph {
                snippets: vec![
                    Snippet::Text {
                        text: "alfa ".to_string(),
                    },
                    Snippet::Strong {
                        text: "bravo".to_string(),
                        attributes: None,
                    },
                    Snippet::Text {
                        text: " charlie".to_string(),
                    },
                ],
            }]),
        }];
        let results = sections(source);
        assert_eq!(expected, results.unwrap().1);
    }

    #[test]
    fn aside_multiple_paragraphs() {
        let source = "-- aside\n\nalfa bravo\n\n<<strong|charlie>> delta\n\necho foxtrot";
        let expected = vec![Section::Aside {
            attributes: None,
            content: Some(vec![
                Block::Paragraph {
                    snippets: vec![Snippet::Text {
                        text: "alfa bravo".to_string(),
                    }],
                },
                Block::Paragraph {
                    snippets: vec![
                        Snippet::Strong {
                            text: "charlie".to_string(),
                            attributes: None,
                        },
                        Snippet::Text {
                            text: " delta".to_string(),
                        },
                    ],
                },
                Block::Paragraph {
                    snippets: vec![Snippet::Text {
                        text: "echo foxtrot".to_string(),
                    }],
                },
            ]),
        }];
        let results = sections(source);
        assert_eq!(expected, results.unwrap().1);
    }

    #[test]
    fn multiple_sections() {
        let source = r#"


-- aside
-- id: widget

sierra tango

-- aside

alfa bravo

<<strong|charlie>> delta

echo foxtrot
"#;
        let expected = vec![
            Section::Aside {
                attributes: Some(vec![Attribute::Id {
                    value: "widget".to_string(),
                }]),
                content: Some(vec![Block::Paragraph {
                    snippets: vec![Snippet::Text {
                        text: "sierra tango".to_string(),
                    }],
                }]),
            },
            Section::Aside {
                attributes: None,
                content: Some(vec![
                    Block::Paragraph {
                        snippets: vec![Snippet::Text {
                            text: "alfa bravo".to_string(),
                        }],
                    },
                    Block::Paragraph {
                        snippets: vec![
                            Snippet::Strong {
                                text: "charlie".to_string(),
                                attributes: None,
                            },
                            Snippet::Text {
                                text: " delta".to_string(),
                            },
                        ],
                    },
                    Block::Paragraph {
                        snippets: vec![Snippet::Text {
                            text: "echo foxtrot".to_string(),
                        }],
                    },
                ]),
            },
        ];
        let results = sections(source);
        assert_eq!(expected, results.unwrap().1);
    }

    #[test]
    fn aside_list() {
        let source = r#"

-- aside

alfa bravo

-- list

- charlie
"#;
        let expected = vec![
            Section::Aside {
                attributes: None,
                content: Some(vec![Block::Paragraph {
                    snippets: vec![Snippet::Text {
                        text: "alfa bravo".to_string(),
                    }],
                }]),
            },
            Section::List {
                attributes: None,
                items: Some(vec![Container::ListItem {
                    content: vec![Block::Paragraph {
                        snippets: vec![Snippet::Text {
                            text: "charlie".to_string(),
                        }],
                    }],
                }]),
                preface: None,
            },
        ];
        let results = sections(source);
        assert_eq!(expected, results.unwrap().1);
    }

    #[test]
    fn title_basic() {
        let source = r#"

-- title

alfa bravo
"#;
        let expected = vec![Section::Title {
            attributes: None,
            headline: Some(Block::Paragraph {
                snippets: vec![Snippet::Text {
                    text: "alfa bravo".to_string(),
                }],
            }),
            content: None,
        }];
        let results = sections(source);
        assert_eq!(expected, results.unwrap().1);
    }

    #[test]
    fn title_with_content() {
        let source = r#"

-- title

alfa bravo

charlie delta

echo foxtrot
"#;
        let expected = vec![Section::Title {
            attributes: None,
            headline: Some(Block::Paragraph {
                snippets: vec![Snippet::Text {
                    text: "alfa bravo".to_string(),
                }],
            }),
            content: Some(vec![
                Block::Paragraph {
                    snippets: vec![Snippet::Text {
                        text: "charlie delta".to_string(),
                    }],
                },
                Block::Paragraph {
                    snippets: vec![Snippet::Text {
                        text: "echo foxtrot".to_string(),
                    }],
                },
            ]),
        }];
        let results = sections(source);
        assert_eq!(expected, results.unwrap().1);
    }

    #[test]
    fn strong_with_attributes() {
        let source = r#"<<strong|bravo|id: tango>>"#;
        let expected = Snippet::Strong {
            attributes: Some(vec![Attribute::Id {
                value: "tango".to_string(),
            }]),
            text: "bravo".to_string(),
        };
        let results = snippet_strong(source);
        assert_eq!(expected, results.unwrap().1);
    }

    #[test]
    fn strong_text_with_attributes_in_p() {
        let source = "-- p\n\nalfa <<strong|bravo|id: tango>> charlie";
        let expected = vec![Section::P {
            attributes: None,
            content: Some(vec![Block::Paragraph {
                snippets: vec![
                    Snippet::Text {
                        text: "alfa ".to_string(),
                    },
                    Snippet::Strong {
                        attributes: Some(vec![Attribute::Id {
                            value: "tango".to_string(),
                        }]),
                        text: "bravo".to_string(),
                    },
                    Snippet::Text {
                        text: " charlie".to_string(),
                    },
                ],
            }]),
        }];
        let results = sections(source);
        assert_eq!(expected, results.unwrap().1);
    }

    #[test]
    fn inline_attribute_first_item_of_paragraph() {
        let source = "-- p\n\n<<strong|foxtrot echo|id: delta>> bravo";
        let expected = vec![Section::P {
            attributes: None,
            content: Some(vec![Block::Paragraph {
                snippets: vec![
                    Snippet::Strong {
                        attributes: Some(vec![Attribute::Id {
                            value: "delta".to_string(),
                        }]),
                        text: "foxtrot echo".to_string(),
                    },
                    Snippet::Text {
                        text: " bravo".to_string(),
                    },
                ],
            }]),
        }];
        let results = sections(source);
        assert_eq!(expected, results.unwrap().1);
    }

    #[test]
    fn inline_attribute_list_item_of_paragraph() {
        let source = "-- p\n\ntango <<strong|sierra alfa|id: delta>>";
        let expected = vec![Section::P {
            attributes: None,
            content: Some(vec![Block::Paragraph {
                snippets: vec![
                    Snippet::Text {
                        text: "tango ".to_string(),
                    },
                    Snippet::Strong {
                        attributes: Some(vec![Attribute::Id {
                            value: "delta".to_string(),
                        }]),
                        text: "sierra alfa".to_string(),
                    },
                ],
            }]),
        }];
        let results = sections(source);
        assert_eq!(expected, results.unwrap().1);
    }

    //
}

--code
-- rust


#![allow(unused_imports)]
#![allow(unused_variables)]
#![allow(dead_code)]
use nom::branch::alt;
use nom::bytes::complete::is_not;
use nom::bytes::complete::tag;
use nom::bytes::complete::tag_no_case;
use nom::bytes::complete::take_until;
use nom::character::complete::line_ending;
use nom::character::complete::multispace0;
use nom::character::complete::newline;
use nom::character::complete::not_line_ending;
use nom::character::complete::space0;
use nom::combinator::eof;
use nom::combinator::not;
use nom::combinator::opt;
use nom::combinator::rest;
use nom::multi::many0;
use nom::multi::many1;
use nom::multi::many_till;
use nom::sequence::delimited;
use nom::sequence::preceded;
use nom::sequence::terminated;
use nom::sequence::tuple;
use nom::IResult;
use nom::Parser;

fn main() {
    // dbg!(get_sections(
    //     "-- ALFA\n\nasdf asdf\n\nwer\n\nwer\n\n-- BRAVO\n\nyiyiyi yiyi\n\nwkwkwkwk\n\n"
    // )
    // .unwrap());
}

fn attributes(source: &str) -> IResult<&str, Option<Vec<Attribute>>> {
    let (source, attrs) = opt(many1(id_attr))(source)?;
    Ok((source, attrs))
}

fn id_attr(source: &str) -> IResult<&str, Attribute> {
    let (source, attr) = delimited(tag("-- id: "), not_line_ending, line_ending)(source)?;
    Ok((source, Attribute::Id(attr.to_string())))
}

fn get_sections(source: &str) -> IResult<&str, Vec<Section>> {
    let (source, sections) =
        many1(preceded(multispace0, alt((aside_section, section_bravo))))(source)?;
    Ok((source, sections))
}

fn aside_section(source: &str) -> IResult<&str, Section> {
    let (source, _) = tuple((tag_no_case("-- aside"), space0, line_ending))(source)?;
    let (source, attrs) = attributes(source)?;
    let (source, paragraphs) = paragraphs(source)?;
    Ok((
        source,
        Section::Aside {
            attrs,
            content: paragraphs,
        },
    ))
}

fn section_bravo(source: &str) -> IResult<&str, Section> {
    let (source, _) = tuple((tag_no_case("-- bravo"), space0, line_ending))(source)?;
    let (source, p) = paragraphs(source)?;
    Ok((
        source,
        Section::Bravo {
            attrs: None,
            content: None,
        },
    ))
}

fn paragraphs(source: &str) -> IResult<&str, Option<Vec<Block>>> {
    let (source, paragraphs) = opt(many1(preceded(multispace0, paragraph)))(source)?;
    Ok((source, paragraphs))
}

fn paragraph(source: &str) -> IResult<&str, Block> {
    let (source, _) = not(tag("--"))(source)?;
    let (source, x) = many1(preceded(opt(line_ending), text_content))(source)?;
    dbg!(&x);
    Ok((source, Block::Paragraph(x)))
}

fn text_content(source: &str) -> IResult<&str, Snippet> {
    let (source, content) = is_not("\n")(source)?;
    Ok((source, Snippet::Text(content.to_string())))
}

#[derive(Debug, PartialEq)]
enum Section {
    Aside {
        attrs: Option<Vec<Attribute>>,
        content: Option<Vec<Block>>,
    },
    Bravo {
        attrs: Option<Vec<Attribute>>,
        content: Option<Vec<Block>>,
    },
}

#[derive(Debug, PartialEq)]
enum Attribute {
    Id(String),
    None,
}

#[derive(Debug, PartialEq)]
enum Block {
    Paragraph(Vec<Snippet>),
    None,
}

#[derive(Debug, PartialEq)]
enum Snippet {
    Text(String),
    None,
}

#[cfg(test)]

mod test {

    use super::*;

    #[test]
    fn aside_basic() {
        let source = "-- aside\n\nalfa bravo";
        // asdf asdf\n\nwer\n\nwer\n\n-- BRAVO\n\nyiyiyi yiyi\n\nwkwkwkwk\n\n";
        let expected = vec![Section::Aside {
            attrs: None,
            content: Some(vec![Block::Paragraph(vec![Snippet::Text(
                "alfa bravo".to_string(),
            )])]),
        }];
        let results = get_sections(source);
        assert_eq!(expected, results.unwrap().1);
    }

    #[test]
    fn aside_with_attributes() {
        let source = "-- aside\n-- id: charlie\n\nalfa bravo";
        // asdf asdf\n\nwer\n\nwer\n\n-- BRAVO\n\nyiyiyi yiyi\n\nwkwkwkwk\n\n";
        let expected = vec![Section::Aside {
            attrs: Some(vec![Attribute::Id("charlie".to_string())]),
            content: Some(vec![Block::Paragraph(vec![Snippet::Text(
                "alfa bravo".to_string(),
            )])]),
        }];
        let results = get_sections(source);
        assert_eq!(expected, results.unwrap().1);
    }
}

-- code
-- rust

#![allow(unused_imports)]
#![allow(unused_variables)]
use nom::branch::alt;
use nom::bytes::complete::is_not;
use nom::bytes::complete::tag;
use nom::bytes::complete::take_until;
use nom::character::complete::line_ending;
use nom::character::complete::multispace0;
use nom::character::complete::newline;
use nom::character::complete::space0;
use nom::combinator::eof;
use nom::combinator::not;
use nom::combinator::opt;
use nom::combinator::rest;
use nom::multi::many0;
use nom::multi::many1;
use nom::multi::many_till;
use nom::sequence::preceded;
use nom::sequence::terminated;
use nom::sequence::tuple;
use nom::IResult;
use nom::Parser;

fn main() {
    dbg!(get_sections(
        "-- ALFA\n\nasdf asdf\n\nwer\n\nwer\n\n-- BRAVO\n\nyiyiyi yiyi\n\nwkwkwkwk\n\n"
    )
    .unwrap());
}

fn get_sections(source: &str) -> IResult<&str, Vec<Vec<&str>>> {
    let (source, paragraphs) =
        many0(preceded(multispace0, alt((section_alfa, section_bravo))))(source)?;
    Ok((source, paragraphs))
}

fn section_alfa(source: &str) -> IResult<&str, Vec<&str>> {
    let (source, x) = tuple((tag("-- ALFA"), space0, line_ending))(source)?;
    let (source, p) = paragraphs(source)?;
    dbg!("ALFA");
    Ok((source, vec![]))
}

fn section_bravo(source: &str) -> IResult<&str, Vec<&str>> {
    let (source, x) = tuple((tag("-- BRAVO"), space0, line_ending))(source)?;
    let (source, p) = paragraphs(source)?;
    dbg!("BRAVO");
    Ok((source, vec![]))
}

fn paragraphs(source: &str) -> IResult<&str, Vec<Vec<&str>>> {
    let (source, paragraphs) = many1(preceded(multispace0, paragraph))(source)?;
    Ok((source, paragraphs))
}

fn paragraph(source: &str) -> IResult<&str, Vec<&str>> {
    let (source, _) = not(tag("--"))(source)?;
    let (source, x) = many1(preceded(opt(line_ending), text_content))(source)?;
    dbg!(&x);
    Ok((source, x))
}

fn text_content(source: &str) -> IResult<&str, &str> {
    let (source, content) = is_not("\n")(source)?;
    Ok((source, content))
}



-- code
-- rust


#![allow(unused_imports)]
use nom::branch::alt;
use nom::bytes::complete::is_not;
use nom::bytes::complete::tag;
use nom::bytes::complete::take_until;
use nom::character::complete::line_ending;
use nom::character::complete::multispace0;
use nom::character::complete::newline;
use nom::character::complete::space0;
use nom::combinator::eof;
use nom::combinator::opt;
use nom::combinator::rest;
use nom::multi::many1;
use nom::multi::many_till;
use nom::sequence::preceded;
use nom::sequence::terminated;
use nom::sequence::tuple;
use nom::IResult;
use nom::Parser;

fn main() {
    dbg!(get_paragraphs("asdf asdf\n\nwer\n\nwer").unwrap());
}

fn get_paragraphs(source: &str) -> IResult<&str, Vec<Vec<&str>>> {
    let (source, paragraphs) = many1(preceded(multispace0, paragraph))(source)?;
    Ok((source, paragraphs))
}

fn paragraph(source: &str) -> IResult<&str, Vec<&str>> {
    let (source, x) = many1(preceded(opt(line_ending), text_content))(source)?;
    dbg!(&x);

    // let (source, p) = alt((
    //     terminated(take_until("\n"), tuple((newline, space0, newline))).map(|s: &str| s.trim()),
    //     rest,
    // ))(source)?;

    Ok((source, x))
}

fn text_content(source: &str) -> IResult<&str, &str> {
    let (source, content) = is_not("\n")(source)?;
    Ok((source, content))
}