Initial commit
This commit is contained in:
5
.gitignore
vendored
5
.gitignore
vendored
@@ -16,3 +16,8 @@ target/
|
|||||||
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
||||||
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
||||||
#.idea/
|
#.idea/
|
||||||
|
|
||||||
|
|
||||||
|
# Added by cargo
|
||||||
|
|
||||||
|
/target
|
||||||
|
|||||||
10
.idea/.gitignore
generated
vendored
Normal file
10
.idea/.gitignore
generated
vendored
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
# Default ignored files
|
||||||
|
/shelf/
|
||||||
|
/workspace.xml
|
||||||
|
# Ignored default folder with query files
|
||||||
|
/queries/
|
||||||
|
# Datasource local storage ignored files
|
||||||
|
/dataSources/
|
||||||
|
/dataSources.local.xml
|
||||||
|
# Editor-based HTTP Client requests
|
||||||
|
/httpRequests/
|
||||||
6
.idea/misc.xml
generated
Normal file
6
.idea/misc.xml
generated
Normal file
@@ -0,0 +1,6 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project version="4">
|
||||||
|
<component name="ProjectRootManager" version="2">
|
||||||
|
<output url="file://$PROJECT_DIR$/out" />
|
||||||
|
</component>
|
||||||
|
</project>
|
||||||
8
.idea/modules.xml
generated
Normal file
8
.idea/modules.xml
generated
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project version="4">
|
||||||
|
<component name="ProjectModuleManager">
|
||||||
|
<modules>
|
||||||
|
<module fileurl="file://$PROJECT_DIR$/Brig.iml" filepath="$PROJECT_DIR$/Brig.iml" />
|
||||||
|
</modules>
|
||||||
|
</component>
|
||||||
|
</project>
|
||||||
6
.idea/vcs.xml
generated
Normal file
6
.idea/vcs.xml
generated
Normal file
@@ -0,0 +1,6 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project version="4">
|
||||||
|
<component name="VcsDirectoryMappings">
|
||||||
|
<mapping directory="" vcs="Git" />
|
||||||
|
</component>
|
||||||
|
</project>
|
||||||
13
Brig.iml
Normal file
13
Brig.iml
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<module type="WEB_MODULE" version="4">
|
||||||
|
<component name="NewModuleRootManager" inherit-compiler-output="true">
|
||||||
|
<exclude-output />
|
||||||
|
<content url="file://$MODULE_DIR$">
|
||||||
|
<sourceFolder url="file://$MODULE_DIR$/src" isTestSource="false" />
|
||||||
|
<sourceFolder url="file://$MODULE_DIR$/tests" isTestSource="true" />
|
||||||
|
<excludeFolder url="file://$MODULE_DIR$/target" />
|
||||||
|
</content>
|
||||||
|
<orderEntry type="inheritedJdk" />
|
||||||
|
<orderEntry type="sourceFolder" forTests="false" />
|
||||||
|
</component>
|
||||||
|
</module>
|
||||||
1641
Cargo.lock
generated
Normal file
1641
Cargo.lock
generated
Normal file
File diff suppressed because it is too large
Load Diff
11
Cargo.toml
Normal file
11
Cargo.toml
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
[package]
|
||||||
|
name = "Brig"
|
||||||
|
version = "0.1.0"
|
||||||
|
edition = "2024"
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
mediawiki_rest_api = "0.2.1"
|
||||||
|
parse_wiki_text = "0.1.5"
|
||||||
|
tokio = { version = "1.48.0", features = ["rt", "rt-multi-thread", "macros"] }
|
||||||
|
|
||||||
|
|
||||||
36
GEMINI.md
Normal file
36
GEMINI.md
Normal file
@@ -0,0 +1,36 @@
|
|||||||
|
|
||||||
|
## Critical Rules
|
||||||
|
|
||||||
|
### 1. Code Organization
|
||||||
|
|
||||||
|
- Many small files over few large files
|
||||||
|
- High cohesion, low coupling
|
||||||
|
- 200-400 lines typical, 800 max per file
|
||||||
|
- Organize by feature/domain, not by type
|
||||||
|
|
||||||
|
### 2. Code Style
|
||||||
|
|
||||||
|
- No emojis in code, comments, or documentation
|
||||||
|
- Immutability always - never mutate objects or arrays
|
||||||
|
- No console.log in production code
|
||||||
|
- Proper error handling with try/catch
|
||||||
|
- Input validation with Zod or similar
|
||||||
|
- Always follow existing coding style when adding new code
|
||||||
|
- Avoid nested matching pattern as much as possible
|
||||||
|
|
||||||
|
### 3. Regarding Dependencies:
|
||||||
|
- Avoid introducing new external dependencies unless absolutely necessary.
|
||||||
|
- If a new dependency is required, state the reason.
|
||||||
|
|
||||||
|
### 4. Adding tests
|
||||||
|
When adding tests for a particular feature, add the tests near where other tests
|
||||||
|
for similar code live. Try not to add new dependencies as you add tests, and try
|
||||||
|
to make new tests similar in style and API usage to other tests which already
|
||||||
|
exist nearby.
|
||||||
|
|
||||||
|
### 5. AI/Developer Rules
|
||||||
|
- **Rust Development Verification:** After modifying any Rust code (`.rs` files or `Cargo.toml`), you **MUST** run `cargo build --manifest-path cli/Cargo.toml` to verify compilation.
|
||||||
|
- If the build fails, you **MUST** analyze the error log and apply a fix immediately.
|
||||||
|
- Do **NOT** report the task as complete until the code compiles successfully.
|
||||||
|
|
||||||
|
|
||||||
34
spec.md
Normal file
34
spec.md
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
# Spec: Wikimedia article manipulation library
|
||||||
|
|
||||||
|
**Goal**: Create a rust library providing high level and low level functions manipulating the properties
|
||||||
|
and the content of wikimedia pages. This library must ne built on top of the 2 existing rust libraries
|
||||||
|
parse_wiki_text and parse_wiki_text to implement those functionalities
|
||||||
|
|
||||||
|
**User Story 1**: As a user, I want to get the raw text content of a wikimedia article by specifying the site, language and article name.
|
||||||
|
**User Story 2**: As a user, I want to get the list of all the links of an article
|
||||||
|
**User Story 3**: As a user, I want to get the list of all the categories of an article
|
||||||
|
**User Story 4**: As a user, I want to get the list of all the references of an article
|
||||||
|
**User Story 5**: As a user, I want to get the list of all the templates of an article
|
||||||
|
**User Story 6**: As a user, I want to get the list of all the parameters of a template
|
||||||
|
|
||||||
|
**Functional Requirements**:
|
||||||
|
1. Use mediawiki_rest_api to get access to the wikimedia article
|
||||||
|
2. Use mediawiki_rest_api to get the initial content of the article
|
||||||
|
3. Use parse_wiki_text to parse the wikimedia article
|
||||||
|
4. Use parse_wiki_text to interact with any internal content of the article
|
||||||
|
5. Use parse_wiki_text nodes
|
||||||
|
6. Always write unit tests for every functions
|
||||||
|
|
||||||
|
**Technical Constraints**:
|
||||||
|
- rust language
|
||||||
|
- mediawiki_rest_api
|
||||||
|
- parse_wiki_text
|
||||||
|
|
||||||
|
**Test Cases**:
|
||||||
|
- **Scenario A**: Valid access -> Status 200 + content of the article.
|
||||||
|
- **Scenario B**: Valid access -> Status 200 + categories of the article.
|
||||||
|
- **Scenario C**: Valid access -> Status 200 + links of the article.
|
||||||
|
- **Scenario D**: Valid access -> Status 200 + references of the article.
|
||||||
|
- **Scenario E**: Valid access -> Status 200 + templates of the article.
|
||||||
|
- **Scenario F**: Valid access -> Status 200 + parameters of a template.
|
||||||
|
- **Scenario Z**: Incorrect article -> Status 401 + error message.
|
||||||
311
src/lib.rs
Normal file
311
src/lib.rs
Normal file
@@ -0,0 +1,311 @@
|
|||||||
|
use mediawiki_rest_api::prelude::*;
|
||||||
|
use mediawiki_rest_api::rest_api_builder::RestApiBuilder;
|
||||||
|
use parse_wiki_text::{Configuration, Node, Output};
|
||||||
|
use std::collections::HashMap;
|
||||||
|
|
||||||
|
/// Fetches the raw wikitext of a Wikipedia article.
|
||||||
|
///
|
||||||
|
/// # Arguments
|
||||||
|
///
|
||||||
|
/// * `language` - The language code (e.g., "en", "fr").
|
||||||
|
/// * `title` - The title of the article.
|
||||||
|
///
|
||||||
|
/// # Returns
|
||||||
|
///
|
||||||
|
/// The raw wikitext content of the article.
|
||||||
|
pub async fn fetch_article(language: &str, title: &str) -> Result<String, String> {
|
||||||
|
let api = RestApiBuilder::wikipedia(language).build();
|
||||||
|
let page = Page::new(title);
|
||||||
|
match page.get(&api, false).await {
|
||||||
|
Ok((_, wikitext)) => Ok(wikitext),
|
||||||
|
Err(e) => Err(format!("Failed to fetch article: {}", e)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parses the wikitext into a structured format.
|
||||||
|
///
|
||||||
|
/// # Arguments
|
||||||
|
///
|
||||||
|
/// * `text` - The raw wikitext.
|
||||||
|
///
|
||||||
|
/// # Returns
|
||||||
|
///
|
||||||
|
/// The parsed output containing nodes.
|
||||||
|
pub fn parse(text: &str) -> Output {
|
||||||
|
let config = Configuration::default();
|
||||||
|
config.parse(text)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Extracts all interwiki links from the parsed nodes.
|
||||||
|
///
|
||||||
|
/// # Arguments
|
||||||
|
///
|
||||||
|
/// * `nodes` - The list of nodes to traverse.
|
||||||
|
///
|
||||||
|
/// # Returns
|
||||||
|
///
|
||||||
|
/// A list of link targets (e.g., "Page Title").
|
||||||
|
pub fn get_links(nodes: &[Node]) -> Vec<String> {
|
||||||
|
let mut links = Vec::new();
|
||||||
|
visit_nodes(nodes, &mut |node| {
|
||||||
|
if let Node::Link { target, .. } = node {
|
||||||
|
links.push(target.to_string());
|
||||||
|
}
|
||||||
|
});
|
||||||
|
links
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Extracts all categories from the parsed nodes.
|
||||||
|
///
|
||||||
|
/// # Arguments
|
||||||
|
///
|
||||||
|
/// * `nodes` - The list of nodes to traverse.
|
||||||
|
///
|
||||||
|
/// # Returns
|
||||||
|
///
|
||||||
|
/// A list of category targets (e.g., "Category:Rust").
|
||||||
|
pub fn get_categories(nodes: &[Node]) -> Vec<String> {
|
||||||
|
let mut categories = Vec::new();
|
||||||
|
visit_nodes(nodes, &mut |node| {
|
||||||
|
if let Node::Category { target, .. } = node {
|
||||||
|
categories.push(target.to_string());
|
||||||
|
}
|
||||||
|
});
|
||||||
|
categories
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Extracts all references from the parsed nodes.
|
||||||
|
///
|
||||||
|
/// # Arguments
|
||||||
|
///
|
||||||
|
/// * `nodes` - The list of nodes to traverse.
|
||||||
|
///
|
||||||
|
/// # Returns
|
||||||
|
///
|
||||||
|
/// A list of reference contents (e.g., the text inside <ref> tags).
|
||||||
|
pub fn get_references(nodes: &[Node]) -> Vec<String> {
|
||||||
|
let mut references = Vec::new();
|
||||||
|
visit_nodes(nodes, &mut |node| {
|
||||||
|
if let Node::Tag { name, nodes, .. } = node {
|
||||||
|
if name == "ref" {
|
||||||
|
let content = get_text_from_nodes(nodes);
|
||||||
|
if !content.is_empty() {
|
||||||
|
references.push(content);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
references
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Extracts all templates from the parsed nodes.
|
||||||
|
///
|
||||||
|
/// # Arguments
|
||||||
|
///
|
||||||
|
/// * `nodes` - The list of nodes to traverse.
|
||||||
|
///
|
||||||
|
/// # Returns
|
||||||
|
///
|
||||||
|
/// A list of template names (e.g., "Infobox").
|
||||||
|
pub fn get_templates(nodes: &[Node]) -> Vec<String> {
|
||||||
|
let mut templates = Vec::new();
|
||||||
|
visit_nodes(nodes, &mut |node| {
|
||||||
|
if let Node::Template { name, .. } = node {
|
||||||
|
let template_name = get_text_from_nodes(name).trim().to_string();
|
||||||
|
if !template_name.is_empty() {
|
||||||
|
templates.push(template_name);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
templates
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Extracts parameters from a specific template.
|
||||||
|
///
|
||||||
|
/// # Arguments
|
||||||
|
///
|
||||||
|
/// * `nodes` - The list of nodes to traverse.
|
||||||
|
/// * `template_name` - The name of the template to find.
|
||||||
|
///
|
||||||
|
/// # Returns
|
||||||
|
///
|
||||||
|
/// A map of parameter names to their values.
|
||||||
|
pub fn get_template_parameters(nodes: &[Node], template_name: &str) -> HashMap<String, String> {
|
||||||
|
let mut parameters = HashMap::new();
|
||||||
|
visit_nodes(nodes, &mut |node| {
|
||||||
|
if let Node::Template { name, parameters: params, .. } = node {
|
||||||
|
let current_template_name = get_text_from_nodes(name).trim().to_string();
|
||||||
|
if current_template_name.eq_ignore_ascii_case(template_name) {
|
||||||
|
for param in params {
|
||||||
|
let key = if let Some(name) = ¶m.name {
|
||||||
|
get_text_from_nodes(name).trim().to_string()
|
||||||
|
} else {
|
||||||
|
continue;
|
||||||
|
};
|
||||||
|
let value = get_text_from_nodes(¶m.value).trim().to_string();
|
||||||
|
parameters.insert(key, value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
parameters
|
||||||
|
}
|
||||||
|
|
||||||
|
fn visit_nodes<F>(nodes: &[Node], callback: &mut F)
|
||||||
|
where
|
||||||
|
F: FnMut(&Node),
|
||||||
|
{
|
||||||
|
for node in nodes {
|
||||||
|
callback(node);
|
||||||
|
match node {
|
||||||
|
Node::Heading { nodes, .. } => visit_nodes(nodes, callback),
|
||||||
|
Node::Link { text, .. } => visit_nodes(text, callback),
|
||||||
|
Node::ExternalLink { nodes, .. } => visit_nodes(nodes, callback),
|
||||||
|
Node::Preformatted { nodes, .. } => visit_nodes(nodes, callback),
|
||||||
|
Node::Tag { nodes, .. } => visit_nodes(nodes, callback),
|
||||||
|
Node::Image { text, .. } => visit_nodes(text, callback),
|
||||||
|
Node::UnorderedList { items, .. } => {
|
||||||
|
for item in items {
|
||||||
|
visit_nodes(&item.nodes, callback);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Node::OrderedList { items, .. } => {
|
||||||
|
for item in items {
|
||||||
|
visit_nodes(&item.nodes, callback);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Node::DefinitionList { items, .. } => {
|
||||||
|
for item in items {
|
||||||
|
visit_nodes(&item.nodes, callback);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Node::Template { name, parameters, .. } => {
|
||||||
|
visit_nodes(name, callback);
|
||||||
|
for param in parameters {
|
||||||
|
if let Some(name) = ¶m.name {
|
||||||
|
visit_nodes(name, callback);
|
||||||
|
}
|
||||||
|
visit_nodes(¶m.value, callback);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Node::Parameter { name, default, .. } => {
|
||||||
|
visit_nodes(name, callback);
|
||||||
|
if let Some(default_val) = default {
|
||||||
|
visit_nodes(default_val, callback);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Node::Table { rows, captions, .. } => {
|
||||||
|
for caption in captions {
|
||||||
|
visit_nodes(&caption.content, callback);
|
||||||
|
}
|
||||||
|
for row in rows {
|
||||||
|
for cell in &row.cells {
|
||||||
|
visit_nodes(&cell.content, callback);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Node::Category { ordinal, .. } => {
|
||||||
|
visit_nodes(ordinal, callback);
|
||||||
|
}
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_text_from_nodes(nodes: &[Node]) -> String {
|
||||||
|
let mut text = String::new();
|
||||||
|
for node in nodes {
|
||||||
|
match node {
|
||||||
|
Node::Text { value, .. } => text.push_str(value),
|
||||||
|
Node::Link { text: link_text, .. } => text.push_str(&get_text_from_nodes(link_text)),
|
||||||
|
Node::ExternalLink { nodes, .. } => text.push_str(&get_text_from_nodes(nodes)),
|
||||||
|
Node::Bold { .. } => {},
|
||||||
|
Node::Italic { .. } => {},
|
||||||
|
Node::BoldItalic { .. } => {},
|
||||||
|
Node::CharacterEntity { character, .. } => text.push(*character),
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
text
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_fetch_article_valid() {
|
||||||
|
let result = fetch_article("en", "Rust (programming language)").await;
|
||||||
|
assert!(result.is_ok());
|
||||||
|
let content = result.unwrap();
|
||||||
|
assert!(!content.is_empty());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_fetch_article_invalid() {
|
||||||
|
let result = fetch_article("en", "ThisPageDoesNotExist_12345_XYZ").await;
|
||||||
|
assert!(result.is_err());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_parse() {
|
||||||
|
let text = "== Heading ==";
|
||||||
|
let output = parse(text);
|
||||||
|
assert!(!output.nodes.is_empty());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_get_links() {
|
||||||
|
let text = "Here is a [[Link]] and another [[Another Link|with text]].";
|
||||||
|
let output = parse(text);
|
||||||
|
let links = get_links(&output.nodes);
|
||||||
|
assert_eq!(links, vec!["Link", "Another Link"]);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_get_links_nested() {
|
||||||
|
let text = "* [[List Link]]\n* Item with [[Nested Link]]";
|
||||||
|
let output = parse(text);
|
||||||
|
let links = get_links(&output.nodes);
|
||||||
|
assert!(links.contains(&"List Link".to_string()));
|
||||||
|
assert!(links.contains(&"Nested Link".to_string()));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_get_categories() {
|
||||||
|
let text = "Some text. [[Category:Programming languages]] [[Category:Rust]]";
|
||||||
|
let output = parse(text);
|
||||||
|
let categories = get_categories(&output.nodes);
|
||||||
|
assert!(categories.contains(&"Category:Programming languages".to_string()));
|
||||||
|
assert!(categories.contains(&"Category:Rust".to_string()));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_get_references() {
|
||||||
|
let text = "Some statement.<ref>Source 1</ref> Another statement.<ref>Source 2</ref>";
|
||||||
|
let output = parse(text);
|
||||||
|
let references = get_references(&output.nodes);
|
||||||
|
assert_eq!(references.len(), 2);
|
||||||
|
assert!(references.contains(&"Source 1".to_string()));
|
||||||
|
assert!(references.contains(&"Source 2".to_string()));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_get_templates() {
|
||||||
|
let text = "{{Infobox person\n| name = Example\n}}\nSome text. {{Another template}}";
|
||||||
|
let output = parse(text);
|
||||||
|
let templates = get_templates(&output.nodes);
|
||||||
|
assert_eq!(templates.len(), 2);
|
||||||
|
assert!(templates.contains(&"Infobox person".to_string()));
|
||||||
|
assert!(templates.contains(&"Another template".to_string()));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_get_template_parameters() {
|
||||||
|
let text = "{{Infobox person\n| name = Example Name\n| age = 30\n}}";
|
||||||
|
let output = parse(text);
|
||||||
|
let params = get_template_parameters(&output.nodes, "Infobox person");
|
||||||
|
assert_eq!(params.get("name"), Some(&"Example Name".to_string()));
|
||||||
|
assert_eq!(params.get("age"), Some(&"30".to_string()));
|
||||||
|
}
|
||||||
|
}
|
||||||
181
src/main.rs
Normal file
181
src/main.rs
Normal file
@@ -0,0 +1,181 @@
|
|||||||
|
use Brig::{fetch_article, parse, get_links, get_categories, get_references, get_templates, get_template_parameters};
|
||||||
|
use parse_wiki_text::Node;
|
||||||
|
|
||||||
|
#[tokio::main]
|
||||||
|
async fn main() {
|
||||||
|
// Example usage of the library
|
||||||
|
let language = "en";
|
||||||
|
let title = "Rust (programming language)";
|
||||||
|
|
||||||
|
println!("Fetching article '{}' from {} Wikipedia...", title, language);
|
||||||
|
|
||||||
|
match fetch_article(language, title).await {
|
||||||
|
Ok(wikitext) => {
|
||||||
|
println!("Successfully fetched article. Length: {}", wikitext.len());
|
||||||
|
|
||||||
|
let output = parse(&wikitext);
|
||||||
|
println!("Parsed article. Nodes: {}", output.nodes.len());
|
||||||
|
|
||||||
|
let links = get_links(&output.nodes);
|
||||||
|
println!("Found {} links:", links.len());
|
||||||
|
for (i, link) in links.iter().take(10).enumerate() {
|
||||||
|
println!(" {}. {}", i + 1, link);
|
||||||
|
}
|
||||||
|
if links.len() > 10 {
|
||||||
|
println!(" ... and {} more", links.len() - 10);
|
||||||
|
}
|
||||||
|
|
||||||
|
let categories = get_categories(&output.nodes);
|
||||||
|
println!("Found {} categories:", categories.len());
|
||||||
|
for (i, category) in categories.iter().enumerate() {
|
||||||
|
println!(" {}. {}", i + 1, category);
|
||||||
|
}
|
||||||
|
|
||||||
|
let references = get_references(&output.nodes);
|
||||||
|
println!("Found {} references:", references.len());
|
||||||
|
for (i, reference) in references.iter().take(5).enumerate() {
|
||||||
|
println!(" {}. {}", i + 1, reference);
|
||||||
|
}
|
||||||
|
if references.len() > 5 {
|
||||||
|
println!(" ... and {} more", references.len() - 5);
|
||||||
|
}
|
||||||
|
|
||||||
|
let templates = get_templates(&output.nodes);
|
||||||
|
println!("Found {} templates:", templates.len());
|
||||||
|
for (i, template) in templates.iter().take(10).enumerate() {
|
||||||
|
println!(" {}. {}", i + 1, template);
|
||||||
|
}
|
||||||
|
if templates.len() > 10 {
|
||||||
|
println!(" ... and {} more", templates.len() - 10);
|
||||||
|
}
|
||||||
|
|
||||||
|
let template_name_to_find = "Infobox programming language";
|
||||||
|
let template_params = get_template_parameters(&output.nodes, template_name_to_find);
|
||||||
|
println!("Found {} parameters for template '{}':", template_params.len(), template_name_to_find);
|
||||||
|
for (key, value) in template_params.iter().take(50) {
|
||||||
|
println!(" - {}: {}", key, value);
|
||||||
|
}
|
||||||
|
if template_params.len() > 50 {
|
||||||
|
println!(" ... and {} more", template_params.len() - 10);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Visit and print nodes (demonstration)
|
||||||
|
// visit(&output.nodes, 0);
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
eprintln!("Error: {}", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn visit(nodes: &[Node], depth: usize) {
|
||||||
|
for node in nodes {
|
||||||
|
let indent = " ".repeat(depth * 2);
|
||||||
|
match node {
|
||||||
|
Node::Heading { level, nodes, .. } => {
|
||||||
|
println!("{}Heading (level {})", indent, level);
|
||||||
|
visit(nodes, depth + 1);
|
||||||
|
}
|
||||||
|
Node::Text { value, .. } => {
|
||||||
|
println!("{}Text: {:?}", indent, value);
|
||||||
|
}
|
||||||
|
Node::Link { target, text, .. } => {
|
||||||
|
println!("{}Link: {}", indent, target);
|
||||||
|
visit(text, depth + 1);
|
||||||
|
}
|
||||||
|
Node::ExternalLink { nodes, .. } => {
|
||||||
|
println!("{}ExternalLink", indent);
|
||||||
|
visit(nodes, depth + 1);
|
||||||
|
}
|
||||||
|
Node::UnorderedList { items, .. } => {
|
||||||
|
println!("{}UnorderedList", indent);
|
||||||
|
for item in items {
|
||||||
|
visit(&item.nodes, depth + 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Node::OrderedList { items, .. } => {
|
||||||
|
println!("{}OrderedList", indent);
|
||||||
|
for item in items {
|
||||||
|
visit(&item.nodes, depth + 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Node::DefinitionList { items, .. } => {
|
||||||
|
println!("{}DefinitionList", indent);
|
||||||
|
for item in items {
|
||||||
|
visit(&item.nodes, depth + 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Node::Image { target, text, .. } => {
|
||||||
|
println!("{}Image: {}", indent, target);
|
||||||
|
visit(text, depth + 1);
|
||||||
|
}
|
||||||
|
Node::Template { name, parameters, .. } => {
|
||||||
|
println!("{}Template", indent);
|
||||||
|
println!("{} Name:", indent);
|
||||||
|
visit(name, depth + 2);
|
||||||
|
for param in parameters {
|
||||||
|
println!("{} Param", indent);
|
||||||
|
if let Some(name) = ¶m.name {
|
||||||
|
println!("{} Key:", indent);
|
||||||
|
visit(name, depth + 3);
|
||||||
|
}
|
||||||
|
println!("{} Value:", indent);
|
||||||
|
visit(¶m.value, depth + 3);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Node::Parameter { name, default, .. } => {
|
||||||
|
println!("{}Parameter Usage", indent);
|
||||||
|
println!("{} Name:", indent);
|
||||||
|
visit(name, depth + 2);
|
||||||
|
if let Some(default_val) = default {
|
||||||
|
println!("{} Default:", indent);
|
||||||
|
visit(default_val, depth + 2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Node::Tag { name, nodes, .. } => {
|
||||||
|
println!("{}Tag: <{}>", indent, name);
|
||||||
|
visit(nodes, depth + 1);
|
||||||
|
}
|
||||||
|
Node::StartTag { name, .. } => {
|
||||||
|
println!("{}StartTag: <{}>", indent, name);
|
||||||
|
}
|
||||||
|
Node::EndTag { name, .. } => {
|
||||||
|
println!("{}EndTag: </{}>", indent, name);
|
||||||
|
}
|
||||||
|
Node::Preformatted { nodes, .. } => {
|
||||||
|
println!("{}Preformatted", indent);
|
||||||
|
visit(nodes, depth + 1);
|
||||||
|
}
|
||||||
|
Node::Table { rows, captions, .. } => {
|
||||||
|
println!("{}Table", indent);
|
||||||
|
for caption in captions {
|
||||||
|
println!("{} Caption", indent);
|
||||||
|
visit(&caption.content, depth + 2);
|
||||||
|
}
|
||||||
|
for row in rows {
|
||||||
|
println!("{} Row", indent);
|
||||||
|
for cell in &row.cells {
|
||||||
|
println!("{} Cell", indent);
|
||||||
|
visit(&cell.content, depth + 3);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Node::Bold { .. } => println!("{}Bold", indent),
|
||||||
|
Node::Italic { .. } => println!("{}Italic", indent),
|
||||||
|
Node::BoldItalic { .. } => println!("{}BoldItalic", indent),
|
||||||
|
Node::ParagraphBreak { .. } => println!("{}ParagraphBreak", indent),
|
||||||
|
Node::HorizontalDivider { .. } => println!("{}HorizontalDivider", indent),
|
||||||
|
Node::Category { target, ordinal, .. } => {
|
||||||
|
println!("{}Category: {}", indent, target);
|
||||||
|
visit(ordinal, depth + 1);
|
||||||
|
}
|
||||||
|
Node::Redirect { target, .. } => {
|
||||||
|
println!("{}Redirect: {}", indent, target);
|
||||||
|
}
|
||||||
|
Node::Comment { .. } => println!("{}Comment", indent),
|
||||||
|
Node::CharacterEntity { character, .. } => println!("{}CharacterEntity: {}", indent, character),
|
||||||
|
Node::MagicWord { .. } => println!("{}MagicWord", indent),
|
||||||
|
// _ => println!("{}Unknown Node: {:?}", indent, node),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
0
src/parser/mod.rs
Normal file
0
src/parser/mod.rs
Normal file
99
tests/tests.rs
Normal file
99
tests/tests.rs
Normal file
@@ -0,0 +1,99 @@
|
|||||||
|
use Brig::{fetch_article, parse, get_links, get_categories, get_references, get_templates, get_template_parameters};
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
async fn get_rust_article_content() -> String {
|
||||||
|
let result = fetch_article("en", "Rust (programming language)").await;
|
||||||
|
assert!(result.is_ok(), "Should successfully fetch a valid article");
|
||||||
|
let content = result.unwrap();
|
||||||
|
assert!(!content.is_empty(), "Content should not be empty");
|
||||||
|
content
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_fetch_article_valid() {
|
||||||
|
// Scenario A: Valid access
|
||||||
|
let content = get_rust_article_content().await;
|
||||||
|
assert!(!content.is_empty());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_fetch_article_invalid() {
|
||||||
|
// Scenario Z: Incorrect article
|
||||||
|
let result = fetch_article("en", "ThisPageDoesNotExist_12345_XYZ").await;
|
||||||
|
assert!(result.is_err(), "Should fail to fetch a non-existent article");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_parse_article() {
|
||||||
|
let content = get_rust_article_content().await;
|
||||||
|
let parsed = parse(&content);
|
||||||
|
assert!(!parsed.nodes.is_empty(), "Should parse fetched content");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_get_links_integration() {
|
||||||
|
// Scenario C: Valid access -> Status 200 + links of the article.
|
||||||
|
let content = get_rust_article_content().await;
|
||||||
|
let parsed = parse(&content);
|
||||||
|
let links = get_links(&parsed.nodes);
|
||||||
|
assert!(!links.is_empty(), "Should find links in the Rust article");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_get_categories_integration() {
|
||||||
|
// Scenario B: Valid access -> Status 200 + categories of the article.
|
||||||
|
let content = get_rust_article_content().await;
|
||||||
|
let parsed = parse(&content);
|
||||||
|
let categories = get_categories(&parsed.nodes);
|
||||||
|
assert!(!categories.is_empty(), "Should find categories in the Rust article");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_get_references_integration() {
|
||||||
|
// Scenario D: Valid access -> Status 200 + references of the article.
|
||||||
|
let content = get_rust_article_content().await;
|
||||||
|
let parsed = parse(&content);
|
||||||
|
let references = get_references(&parsed.nodes);
|
||||||
|
assert!(!references.is_empty(), "Should find references in the Rust article");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_get_templates_integration() {
|
||||||
|
// Scenario E: Valid access -> Status 200 + templates of the article.
|
||||||
|
let content = get_rust_article_content().await;
|
||||||
|
let parsed = parse(&content);
|
||||||
|
let templates = get_templates(&parsed.nodes);
|
||||||
|
assert!(!templates.is_empty(), "Should find templates in the Rust article");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_get_template_parameters_integration() {
|
||||||
|
// Scenario F: Valid access -> Status 200 + parameters of a template.
|
||||||
|
let content = get_rust_article_content().await;
|
||||||
|
let parsed = parse(&content);
|
||||||
|
let templates = get_templates(&parsed.nodes);
|
||||||
|
|
||||||
|
// "Infobox programming language" is likely present on the Rust page.
|
||||||
|
let target_template = "Infobox programming language";
|
||||||
|
|
||||||
|
if templates.contains(&target_template.to_string()) {
|
||||||
|
let params = get_template_parameters(&parsed.nodes, target_template);
|
||||||
|
assert!(!params.is_empty(), "Should find parameters for Infobox programming language");
|
||||||
|
} else {
|
||||||
|
// Fallback: try to find *any* template that has parameters if the specific one isn't found
|
||||||
|
// This makes the test more robust to content changes on Wikipedia
|
||||||
|
let mut found_params = false;
|
||||||
|
for template_name in templates {
|
||||||
|
let params = get_template_parameters(&parsed.nodes, &template_name);
|
||||||
|
if !params.is_empty() {
|
||||||
|
found_params = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
assert!(found_params, "Should find parameters for at least one template");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user