AnswerDotAI · lukastk · Dec 1, 2024 · Dec 1, 2024 · Dec 5, 2024 · Feb 22, 2025
diff --git a/nbdev/_modidx.py b/nbdev/_modidx.py
@@ -160,6 +160,7 @@
                                'nbdev.migrate.nbdev_migrate': ('api/migrate.html#nbdev_migrate', 'nbdev/migrate.py')},
             'nbdev.process': { 'nbdev.process.NBProcessor': ('api/process.html#nbprocessor', 'nbdev/process.py'),
                                'nbdev.process.NBProcessor.__init__': ('api/process.html#nbprocessor.__init__', 'nbdev/process.py'),
+                               'nbdev.process.NBProcessor._handle_nb': ('api/process.html#nbprocessor._handle_nb', 'nbdev/process.py'),
                                'nbdev.process.NBProcessor._proc': ('api/process.html#nbprocessor._proc', 'nbdev/process.py'),
                                'nbdev.process.NBProcessor._process_cell': ( 'api/process.html#nbprocessor._process_cell',
                                                                             'nbdev/process.py'),

diff --git a/nbdev/doclinks.py b/nbdev/doclinks.py
@@ -10,6 +10,7 @@
 from .maker import *
 from .export import *
 from .imports import *
+from .process import plaintext_file_formats
 
 from fastcore.script import *
 from fastcore.utils import *
@@ -120,15 +121,18 @@ def nbglob(path=None, skip_folder_re = '^[_.]', file_glob='*.ipynb', skip_file_r
     "Find all files in a directory matching an extension given a config key."
     path = Path(path or get_config()[key])
     recursive=get_config().recursive
-    res = globtastic(path, file_glob=file_glob, skip_folder_re=skip_folder_re,
-                     skip_file_re=skip_file_re, recursive=recursive, **kwargs)
+    if type(file_glob) != list: file_glob = [file_glob]
+    res = []
+    for _file_glob in file_glob:
+        res += globtastic(path, file_glob=_file_glob, skip_folder_re=skip_folder_re,
+                    skip_file_re=skip_file_re, recursive=recursive, **kwargs)
     return res.map(Path) if as_path else res
 
 # %% ../nbs/api/05_doclinks.ipynb
 def nbglob_cli(
     path:str=None, # Path to notebooks
     symlinks:bool=False, # Follow symlinks?
-    file_glob:str='*.ipynb', # Only include files matching glob
+    file_glob:Union[str, List[str]]=['*.ipynb'] + [f"*.{ext}" for ext in plaintext_file_formats], # Only include files matching glob
     file_re:str=None, # Only include files matching regex
     folder_re:str=None, # Only enter folders matching regex
     skip_file_glob:str=None, # Skip files matching glob
@@ -142,7 +146,7 @@ def nbglob_cli(
 @call_parse
 @delegates(nbglob_cli)
 def nbdev_export(
-    path:str=None, # Path or filename
+    path:str=None, # Path or filename,
     procs:Param("tokens naming the export processors to use.", nargs="*", choices=optional_procs())="black_format",
     **kwargs):
     "Export notebooks in `path` to Python modules"

diff --git a/nbdev/export.py b/nbdev/export.py
@@ -73,12 +73,13 @@ def nb_export(nbname:str,        # Filename of notebook
               name:str=None,     # Name of python script {name}.py to create.
               mod_maker=ModuleMaker,
               debug:bool=False,  # Debug mode
+              fmt:str=None,      # Format to export to
               solo_nb:bool=False # Export single notebook outside of an nbdev project.
              ):
     "Create module(s) from notebook"
     if lib_path is None: lib_path = get_config().lib_path if is_nbdev() else '.'
     exp = ExportModuleProc()
-    nb = NBProcessor(nbname, [exp]+L(procs), debug=debug)
+    nb = NBProcessor(nbname, [exp]+L(procs), debug=debug, fmt=fmt)
     nb.process()
     for mod,cells in exp.modules.items():
         if first(1 for o in cells if o.cell_type=='code'):

diff --git a/nbdev/process.py b/nbdev/process.py
@@ -16,6 +16,14 @@
 
 from collections import defaultdict
 
+try:
+    import nbformat
+    import jupytext
+    import tempfile
+    plaintext_supported = True
+except ImportError:
+    plaintext_supported = False
-except ImportError:
-    plaintext_supported = False
+except ImportError: plaintext_supported = False
-except ImportError:
-    plaintext_supported = False
+except ImportError: plaintext_supported = False
+
 # %% ../nbs/api/03_process.ipynb
 # from https://github.com/quarto-dev/quarto-cli/blob/main/src/resources/jupyter/notebook.py
 langs = defaultdict(
@@ -88,17 +96,43 @@ def _mk_procs(procs, nb): return L(procs).map(instantiate, nb=nb)
 # %% ../nbs/api/03_process.ipynb
 def _is_direc(f): return getattr(f, '__name__', '-')[-1]=='_'
 
+# %% ../nbs/api/03_process.ipynb
+plaintext_file_formats = {
+    "pct.py": "py:percent",
+    "lgt.py": "py:light",
+    "spx.py": "py:sphinx",
+    "myst.md": "md:myst",
+    "pandoc.md": "md:pandoc",
+}
+
 # %% ../nbs/api/03_process.ipynb
 class NBProcessor:
     "Process cells and nbdev comments in a notebook"
-    def __init__(self, path=None, procs=None, nb=None, debug=False, rm_directives=True, process=False):
-        self.nb = read_nb(path) if nb is None else nb
+    def __init__(self, path=None, procs=None, nb=None, debug=False, rm_directives=True, process=False, fmt=None):
+        self._handle_nb(path, nb, fmt)
         self.lang = nb_lang(self.nb)
         for cell in self.nb.cells: cell.directives_ = extract_directives(cell, remove=rm_directives, lang=self.lang)
         self.procs = _mk_procs(procs, nb=self.nb)
         self.debug,self.rm_directives = debug,rm_directives
         if process: self.process()
 
+    def _handle_nb(self, path, nb, fmt):        
+        path = str(path)
+        if any(path.endswith(ext) for ext in plaintext_file_formats) or fmt is not None:
+            fmt = plaintext_file_formats[".".join(path.rsplit('.', 2)[-2:])] if fmt is None else fmt
+            if fmt in plaintext_file_formats.values():
+                if not plaintext_supported:
+                    raise ValueError(f"File {path} has a supported extension, but plaintext conversion is not supported. Please install jupytext and nbformat to use this feature.")
+                nb_converted = jupytext.read(path, fmt=fmt)
+                with tempfile.NamedTemporaryFile(delete=True, suffix=".ipynb") as temp_file:
+                    nbformat.write(nb_converted, temp_file.name)
+                    self.nb = read_nb(temp_file.name) if nb is None else nb
+                    return
+        if fmt is None or fmt == "ipynb":
+            self.nb = read_nb(path) if nb is None else nb
+        else:
+            raise ValueError(f"Invalid format: {fmt}")
+
     def _process_cell(self, proc, cell):
         if not hasattr(cell,'source'): return
         if cell.cell_type=='code' and cell.directives_:

diff --git a/nbs/api/03_process.ipynb b/nbs/api/03_process.ipynb
@@ -37,7 +37,15 @@
     "from fastcore.script import *\n",
     "from fastcore.imports import *\n",
     "\n",
-    "from collections import defaultdict"
+    "from collections import defaultdict\n",
+    "\n",
+    "try:\n",
+    "    import nbformat\n",
+    "    import jupytext\n",
+    "    import tempfile\n",
+    "    plaintext_supported = True\n",
+    "except ImportError:\n",
+    "    plaintext_supported = False"
    ]
   },
   {
@@ -353,6 +361,23 @@
     "def _is_direc(f): return getattr(f, '__name__', '-')[-1]=='_'"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "16dc34e7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#|exporti\n",
+    "plaintext_file_formats = {\n",
+    "    \"pct.py\": \"py:percent\",\n",
+    "    \"lgt.py\": \"py:light\",\n",
+    "    \"spx.py\": \"py:sphinx\",\n",
+    "    \"myst.md\": \"md:myst\",\n",
+    "    \"pandoc.md\": \"md:pandoc\",\n",
+    "}"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -363,14 +388,31 @@
     "#|export\n",
     "class NBProcessor:\n",
     "    \"Process cells and nbdev comments in a notebook\"\n",
-    "    def __init__(self, path=None, procs=None, nb=None, debug=False, rm_directives=True, process=False):\n",
-    "        self.nb = read_nb(path) if nb is None else nb\n",
+    "    def __init__(self, path=None, procs=None, nb=None, debug=False, rm_directives=True, process=False, fmt=None):\n",
+    "        self._handle_nb(path, nb, fmt)\n",
     "        self.lang = nb_lang(self.nb)\n",
     "        for cell in self.nb.cells: cell.directives_ = extract_directives(cell, remove=rm_directives, lang=self.lang)\n",
     "        self.procs = _mk_procs(procs, nb=self.nb)\n",
     "        self.debug,self.rm_directives = debug,rm_directives\n",
     "        if process: self.process()\n",
     "\n",
+    "    def _handle_nb(self, path, nb, fmt):        \n",
+    "        path = str(path)\n",
+    "        if any(path.endswith(ext) for ext in plaintext_file_formats) or fmt is not None:\n",
+    "            fmt = plaintext_file_formats[\".\".join(path.rsplit('.', 2)[-2:])] if fmt is None else fmt\n",
+    "            if fmt in plaintext_file_formats.values():\n",
+    "                if not plaintext_supported:\n",
+    "                    raise ValueError(f\"File {path} has a supported extension, but plaintext conversion is not supported. Please install jupytext and nbformat to use this feature.\")\n",
+    "                nb_converted = jupytext.read(path, fmt=fmt)\n",
+    "                with tempfile.NamedTemporaryFile(delete=True, suffix=\".ipynb\") as temp_file:\n",
+    "                    nbformat.write(nb_converted, temp_file.name)\n",
+    "                    self.nb = read_nb(temp_file.name) if nb is None else nb\n",
+    "                    return\n",
+    "        if fmt is None or fmt == \"ipynb\":\n",
+    "            self.nb = read_nb(path) if nb is None else nb\n",
+    "        else:\n",
+    "            raise ValueError(f\"Invalid format: {fmt}\")\n",
+    "\n",
     "    def _process_cell(self, proc, cell):\n",
     "        if not hasattr(cell,'source'): return\n",
     "        if cell.cell_type=='code' and cell.directives_:\n",

diff --git a/nbs/api/04_export.ipynb b/nbs/api/04_export.ipynb
@@ -244,12 +244,13 @@
     "              name:str=None,     # Name of python script {name}.py to create.\n",
     "              mod_maker=ModuleMaker,\n",
     "              debug:bool=False,  # Debug mode\n",
+    "              fmt:str=None,      # Format to export to\n",
     "              solo_nb:bool=False # Export single notebook outside of an nbdev project.\n",
     "             ):\n",
     "    \"Create module(s) from notebook\"\n",
     "    if lib_path is None: lib_path = get_config().lib_path if is_nbdev() else '.'\n",
     "    exp = ExportModuleProc()\n",
-    "    nb = NBProcessor(nbname, [exp]+L(procs), debug=debug)\n",
+    "    nb = NBProcessor(nbname, [exp]+L(procs), debug=debug, fmt=fmt)\n",
     "    nb.process()\n",
     "    for mod,cells in exp.modules.items():\n",
     "        if first(1 for o in cells if o.cell_type=='code'):\n",

diff --git a/nbs/api/05_doclinks.ipynb b/nbs/api/05_doclinks.ipynb
@@ -29,6 +29,7 @@
     "from nbdev.maker import *\n",
     "from nbdev.export import *\n",
     "from nbdev.imports import *\n",
+    "from nbdev.process import plaintext_file_formats\n",
     "\n",
     "from fastcore.script import *\n",
     "from fastcore.utils import *\n",
@@ -325,8 +326,11 @@
     "    \"Find all files in a directory matching an extension given a config key.\"\n",
     "    path = Path(path or get_config()[key])\n",
     "    recursive=get_config().recursive\n",
-    "    res = globtastic(path, file_glob=file_glob, skip_folder_re=skip_folder_re,\n",
-    "                     skip_file_re=skip_file_re, recursive=recursive, **kwargs)\n",
+    "    if type(file_glob) != list: file_glob = [file_glob]\n",
+    "    res = []\n",
+    "    for _file_glob in file_glob:\n",
+    "        res += globtastic(path, file_glob=_file_glob, skip_folder_re=skip_folder_re,\n",
+    "                    skip_file_re=skip_file_re, recursive=recursive, **kwargs)\n",
     "    return res.map(Path) if as_path else res"
    ]
   },
@@ -340,7 +344,7 @@
     "def nbglob_cli(\n",
     "    path:str=None, # Path to notebooks\n",
     "    symlinks:bool=False, # Follow symlinks?\n",
-    "    file_glob:str='*.ipynb', # Only include files matching glob\n",
+    "    file_glob:Union[str, List[str]]=['*.ipynb'] + [f\"*.{ext}\" for ext in plaintext_file_formats], # Only include files matching glob\n",
     "    file_re:str=None, # Only include files matching regex\n",
     "    folder_re:str=None, # Only enter folders matching regex\n",
     "    skip_file_glob:str=None, # Skip files matching glob\n",
@@ -361,7 +365,7 @@
     "@call_parse\n",
     "@delegates(nbglob_cli)\n",
     "def nbdev_export(\n",
-    "    path:str=None, # Path or filename\n",
+    "    path:str=None, # Path or filename,\n",
     "    procs:Param(\"tokens naming the export processors to use.\", nargs=\"*\", choices=optional_procs())=\"black_format\",\n",
     "    **kwargs):\n",
     "    \"Export notebooks in `path` to Python modules\"\n",
@@ -874,7 +878,19 @@
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "ename": "AttributeError",
+     "evalue": "'NoneType' object has no attribute 'startswith'",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mAttributeError\u001b[0m                            Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[30], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m \u001b[43mc\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdoc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mnumpy.array\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mstartswith\u001b[49m(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mhttp\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[1;32m      2\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m c\u001b[38;5;241m.\u001b[39mdoc(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mNbdevLookup\u001b[39m\u001b[38;5;124m'\u001b[39m)\u001b[38;5;241m.\u001b[39mendswith(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m#nbdevlookup\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[1;32m      3\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m c\u001b[38;5;241m.\u001b[39mdoc(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124marray\u001b[39m\u001b[38;5;124m'\u001b[39m)\n",
+      "\u001b[0;31mAttributeError\u001b[0m: 'NoneType' object has no attribute 'startswith'"
+     ]
+    }
+   ],
    "source": [
     "assert c.doc('numpy.array').startswith('http')\n",
     "assert not c.doc('numpy.Array')\n",

diff --git a/nbs/tutorials/tutorial.ipynb b/nbs/tutorials/tutorial.ipynb
@@ -288,7 +288,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Since we created a repo named `nbev-hello-world` with the `fastai` user, we can clone it as follows:\n",
+    "Since we created a repo named `nbdev-hello-world` with the `fastai` user, we can clone it as follows:\n",
     "\n",
     "```sh\n",
     "git clone https://github.com/fastai/nbdev-hello-world.git\n",