Skip to content

Commit cdc8b74

Browse files
committed
feat: add support for other assets like pdf, videos, docs, etc.
1 parent 6fc4ee5 commit cdc8b74

File tree

4 files changed

+68
-47
lines changed

4 files changed

+68
-47
lines changed

src/index.js

+2-2
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,9 @@
1010
* governing permissions and limitations under the License.
1111
*/
1212
import { createJcrPackage } from './package/packaging.js';
13-
import { getImageUrlsFromMarkdown } from './package/image-mapping.js';
13+
import { getAssetUrlsFromMarkdown } from './package/asset-mapping.js';
1414

1515
export {
1616
createJcrPackage,
17-
getImageUrlsFromMarkdown,
17+
getAssetUrlsFromMarkdown,
1818
};

src/package/image-mapping.js src/package/asset-mapping.js

+31-19
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,9 @@ const imageRegex = /!\[([^\]]*)]\(([^) "]+)(?: *"([^"]*)")?\)|!\[([^\]]*)]\[([^\
1818
// Regex for reference definitions
1919
const referenceRegex = /\[([^\]]+)]:\s*(\S+)/g;
2020

21+
// Regex for non-image asset links (PDFs, videos, docs, etc.)
22+
const nonImageAssetRegex = /(?:\[(.*?)\]|\[.*?\])\(([^)]+\.(?:pdf|doc|docx|xls|xlsx|ppt|pptx|odt|ods|odp|rtf|txt|csv|mp4|mov|avi|wmv|mkv|flv|webm))\)|\[(.*?)\]:\s*(\S+\.(?:pdf|doc|docx|xls|xlsx|ppt|pptx|odt|ods|odp|rtf|txt|csv|mp4|mov|avi|wmv|mkv|flv|webm))/gi;
23+
2124
/**
2225
* Function to find reference definitions in a markdown file.
2326
*
@@ -36,62 +39,71 @@ const findReferenceDefinitionsInMarkdown = (markdownContent) => {
3639
};
3740

3841
/**
39-
* Function to scan for images in a markdown file.
42+
* Function to scan for assets in a markdown file.
4043
*
4144
* @param markdownContent - The content of the markdown file
42-
* @returns {Array<string>} A Map of image urls as key
45+
* @returns {Array<string>} A Map of asset urls as key
4346
*/
44-
const findImagesInMarkdown = (markdownContent) => {
47+
const findAssetsInMarkdown = (markdownContent) => {
4548
const references = findReferenceDefinitionsInMarkdown(markdownContent);
4649

47-
const imageUrls = [];
50+
const assetUrls = [];
4851

4952
// Identify each image url in the markdown content
5053
let match;
54+
let url;
5155
// eslint-disable-next-line no-cond-assign
5256
while ((match = imageRegex.exec(markdownContent)) !== null) {
53-
let url;
5457
if (match[2]) { // Inline image
5558
// eslint-disable-next-line prefer-destructuring
5659
url = match[2];
5760
} else if (match[5]) { // Reference-style image
5861
url = references[match[5]] || null; // Resolve URL from reference map
5962
}
6063
if (url) {
61-
imageUrls.push(url);
64+
assetUrls.push(url);
65+
}
66+
}
67+
68+
// Find and add only non-image asset links
69+
// eslint-disable-next-line no-cond-assign
70+
while ((match = nonImageAssetRegex.exec(markdownContent)) !== null) {
71+
url = match[2] || match[3];
72+
if (url) {
73+
assetUrls.push(url);
6274
}
6375
}
6476

65-
return imageUrls;
77+
return assetUrls;
6678
};
6779

6880
/**
69-
* Get the list image urls present in the markdown.
81+
* Get the list asset urls present in the markdown.
7082
* @param {string} markdownContent - The content of the markdown file
71-
* @returns {Array<string>} An array of image urls.
83+
* @returns {Array<string>} An array of asset urls.
7284
*/
73-
const getImageUrlsFromMarkdown = (markdownContent) => {
85+
const getAssetUrlsFromMarkdown = (markdownContent) => {
7486
try {
75-
return findImagesInMarkdown(markdownContent);
87+
return findAssetsInMarkdown(markdownContent);
7688
} catch (error) {
7789
// eslint-disable-next-line no-console
78-
console.warn('Error getting image urls from markdown:', error);
90+
console.warn('Error getting asset urls from markdown:', error);
7991
return [];
8092
}
8193
};
8294

8395
/**
84-
* Function to sanitize the image mappings.
96+
* Function to sanitize the asset url mappings.
8597
* Delete all entries with empty values (meaning that they have not been used in any jcr page).
86-
* @param {Map} imageMap - The image mapping
87-
* @returns {Map} The sanitized image mapping
98+
* @param {Map} assetMap - The asset url mapping
99+
* @returns {Map} The sanitized asset url mapping
88100
*/
89-
const sanitizeImageMappings = (imageMap) => (
90-
new Map([...imageMap].filter((entry) => entry[1] != null && entry[1] !== ''))
101+
const sanitizeAssetMappings = (assetMap) => (
102+
new Map([...assetMap].filter((entry) => entry[1] != null && entry[1] !== ''))
91103
);
92104

93105
export {
94106
// eslint-disable-next-line import/prefer-default-export
95-
getImageUrlsFromMarkdown,
96-
sanitizeImageMappings,
107+
getAssetUrlsFromMarkdown,
108+
sanitizeAssetMappings,
97109
};

src/package/packaging.js

+19-19
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,10 @@ import {
2222
traverseAndUpdateAssetReferences,
2323
} from './packaging.utils.js';
2424
import { saveFile } from '../shared/filesystem.js';
25-
import { sanitizeImageMappings } from './image-mapping.js';
25+
import { sanitizeAssetMappings } from './asset-mapping.js';
2626

2727
let jcrPages = [];
28-
const IMAGE_MAPPING_FILE = 'image-mappings.json';
28+
const ASSET_MAPPING_FILE = 'asset-mappings.json';
2929

3030
const init = () => {
3131
jcrPages = [];
@@ -41,10 +41,10 @@ const addPage = async (page, dir, prefix, zip) => {
4141
* @param xml - The xml content of the page
4242
* @param pageUrl - The url of the site page
4343
* @param assetFolderName - The name of the asset folder(s) in AEM
44-
* @param imageMappings - A map to store the image urls and their corresponding jcr paths
44+
* @param assetMappings - A map to store the asset urls and their corresponding jcr paths
4545
* @returns {Promise<*|string>} - The updated xml content
4646
*/
47-
export const updateAssetReferences = async (xml, pageUrl, assetFolderName, imageMappings) => {
47+
export const updateAssetReferences = async (xml, pageUrl, assetFolderName, assetMappings) => {
4848
let doc;
4949
try {
5050
doc = getParsedXml(xml);
@@ -55,14 +55,14 @@ export const updateAssetReferences = async (xml, pageUrl, assetFolderName, image
5555
}
5656

5757
// Start traversal from the document root and update the asset references
58-
traverseAndUpdateAssetReferences(doc.documentElement, pageUrl, assetFolderName, imageMappings);
58+
traverseAndUpdateAssetReferences(doc.documentElement, pageUrl, assetFolderName, assetMappings);
5959

6060
const serializer = new XMLSerializer();
6161
return serializer.serializeToString(doc);
6262
};
6363

6464
// eslint-disable-next-line max-len
65-
export const getJcrPages = async (pages, siteFolderName, assetFolderName, imageMappings) => Promise.all(pages.map(async (page) => ({
65+
export const getJcrPages = async (pages, siteFolderName, assetFolderName, assetMappings) => Promise.all(pages.map(async (page) => ({
6666
path: page.path,
6767
sourceXml: page.data,
6868
pageProperties: getPageProperties(page.data),
@@ -71,7 +71,7 @@ export const getJcrPages = async (pages, siteFolderName, assetFolderName, imageM
7171
page.data,
7272
page.url,
7373
assetFolderName,
74-
imageMappings,
74+
assetMappings,
7575
),
7676
jcrPath: getJcrPagePath(page.path, siteFolderName),
7777
contentXmlPath: `jcr_root${getJcrPagePath(page.path, siteFolderName)}/.content.xml`,
@@ -121,34 +121,34 @@ const getEmptyAncestorPages = (pages) => {
121121
};
122122

123123
/**
124-
* Sanitizes (deleting entries without jcr path mapping) and saves the image mappings to a file.
125-
* @param {Array<string>} imageUrls - An array of image urls that were found in the markdown.
124+
* Sanitizes (deleting entries without jcr path mapping) and saves the asset mappings to a file.
125+
* @param {*} assetMappings - The asset mappings
126126
* @param {*} outputDirectory - The directory handle
127127
*/
128-
const sanitizeAndSaveImageMappings = async (imageMappings, outputDirectory) => {
129-
// Sanitize the image mappings
130-
const sanitizedMappings = sanitizeImageMappings(imageMappings);
128+
const sanitizeAndSaveAssetMappings = async (assetMappings, outputDirectory) => {
129+
// Sanitize the asset mappings
130+
const sanitizedMappings = sanitizeAssetMappings(assetMappings);
131131

132132
// Convert Map to a plain object
133133
const obj = Object.fromEntries(sanitizedMappings);
134134

135-
// Save the updated image mapping content into a file
136-
await saveFile(outputDirectory, IMAGE_MAPPING_FILE, JSON.stringify(obj, null, 2));
135+
// Save the updated asset mapping content into a file
136+
await saveFile(outputDirectory, ASSET_MAPPING_FILE, JSON.stringify(obj, null, 2));
137137
};
138138

139139
/**
140140
* Creates a JCR content package from a directory containing pages.
141141
* @param {*} outputDirectory - The directory handle
142142
* @param {Array} pages - An array of pages
143-
* @param {Array<string>} imageUrls - An array of image urls that were found in the markdown.
143+
* @param {Array<string>} assetUrls - An array of asset urls that were found in the markdown.
144144
* @param {string} siteFolderName - The name of the site folder(s) in AEM
145145
* @param {string} assetFolderName - The name of the asset folder(s) in AEM
146146
* @returns {Promise} The file handle for the generated package.
147147
*/
148148
export const createJcrPackage = async (
149149
outputDirectory,
150150
pages,
151-
imageUrls,
151+
assetUrls,
152152
siteFolderName,
153153
assetFolderName,
154154
) => {
@@ -162,10 +162,10 @@ export const createJcrPackage = async (
162162
const prefix = 'jcr';
163163

164164
// create a map using the provided asset urls as keys (values will be populated later)
165-
const imageMappings = new Map(imageUrls.map((url) => [url, '']));
165+
const assetMappings = new Map(assetUrls.map((url) => [url, '']));
166166

167167
// add the pages
168-
jcrPages = await getJcrPages(pages, siteFolderName, assetFolderName, imageMappings);
168+
jcrPages = await getJcrPages(pages, siteFolderName, assetFolderName, assetMappings);
169169
for (let i = 0; i < jcrPages.length; i += 1) {
170170
const page = jcrPages[i];
171171
// eslint-disable-next-line no-await-in-loop
@@ -192,5 +192,5 @@ export const createJcrPackage = async (
192192
await zip.generateAsync({ type: outputType })
193193
.then(async (blob) => saveFile(outputDirectory, `${packageName}.zip`, blob));
194194

195-
await sanitizeAndSaveImageMappings(imageMappings, outputDirectory);
195+
await sanitizeAndSaveAssetMappings(assetMappings, outputDirectory);
196196
};

test/package/image-mapping.test.js

+16-7
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
*/
1212
/* eslint-env mocha */
1313
import { expect } from 'chai';
14-
import { getImageUrlsFromMarkdown, sanitizeImageMappings } from '../../src/package/image-mapping.js';
14+
import { getAssetUrlsFromMarkdown, sanitizeAssetMappings } from '../../src/package/asset-mapping.js';
1515

1616
describe('getImageUrlsFromMarkdown', () => {
1717
it('should return an array of image urls (reference urls)', () => {
@@ -25,7 +25,7 @@ describe('getImageUrlsFromMarkdown', () => {
2525
[image0]: https://aem.live/car.jpeg
2626
[image1]: https://aem.live/car2.jpeg`;
2727

28-
const imageUrls = getImageUrlsFromMarkdown(markdownContent);
28+
const imageUrls = getAssetUrlsFromMarkdown(markdownContent);
2929
expect(imageUrls).to.have.lengthOf(2);
3030
expect(imageUrls[0]).to.equal('https://aem.live/car.jpeg');
3131
expect(imageUrls[1]).to.equal('https://aem.live/car2.jpeg');
@@ -39,16 +39,25 @@ describe('getImageUrlsFromMarkdown', () => {
3939
| ![Car 2](https://aem.live/car2.jpeg) |
4040
+------------------------------------------+`;
4141

42-
const imageUrls = getImageUrlsFromMarkdown(markdownContent);
42+
const imageUrls = getAssetUrlsFromMarkdown(markdownContent);
4343
expect(imageUrls).to.have.lengthOf(2);
4444
expect(imageUrls[0]).to.equal('https://aem.live/car.jpeg');
4545
expect(imageUrls[1]).to.equal('https://aem.live/car2.jpeg');
4646
});
4747

48+
it('should return an non-image asset (pdf) url', () => {
49+
const markdownContent = `Click [here](/content/dam/doe/foo/bar.pdf) to download the handy guide.
50+
Also check [here](https://example.live/siteFoo.html).`;
51+
52+
const imageUrls = getAssetUrlsFromMarkdown(markdownContent);
53+
expect(imageUrls).to.have.lengthOf(1);
54+
expect(imageUrls[0]).to.equal('/content/dam/doe/foo/bar.pdf');
55+
});
56+
4857
it('should return an array with no image urls', () => {
4958
const markdownContent = 'This is a markdown file with no images.';
5059

51-
const imageUrls = getImageUrlsFromMarkdown(markdownContent);
60+
const imageUrls = getAssetUrlsFromMarkdown(markdownContent);
5261
expect(imageUrls).to.have.lengthOf(0);
5362
});
5463

@@ -62,7 +71,7 @@ describe('getImageUrlsFromMarkdown', () => {
6271
6372
[image0]: /test/car2.jpeg`;
6473

65-
const imageUrls = getImageUrlsFromMarkdown(markdownContent);
74+
const imageUrls = getAssetUrlsFromMarkdown(markdownContent);
6675
expect(imageUrls).to.have.lengthOf(2);
6776
expect(imageUrls[0]).to.equal('/car.jpeg');
6877
expect(imageUrls[1]).to.equal('/test/car2.jpeg');
@@ -77,7 +86,7 @@ describe('getImageUrlsFromMarkdown', () => {
7786
['key5', 'value5'],
7887
]);
7988

80-
const result = sanitizeImageMappings(imageMap);
89+
const result = sanitizeAssetMappings(imageMap);
8190

8291
expect(result.size).to.equal(2);
8392
expect(result.has('key1')).to.equal(true);
@@ -94,7 +103,7 @@ describe('getImageUrlsFromMarkdown', () => {
94103
['key3', undefined],
95104
]);
96105

97-
const result = sanitizeImageMappings(imageMap);
106+
const result = sanitizeAssetMappings(imageMap);
98107

99108
expect(result.size).to.equal(0);
100109
});

0 commit comments

Comments
 (0)